From 08a8d9f2a775bcf002889983fefcba6fc7b18d5f Mon Sep 17 00:00:00 2001 From: James Phillips Date: Wed, 26 Jul 2017 22:09:19 -0700 Subject: [PATCH] command/lock: Add -child-exitcode, return 2 on child error (#3329) * Exit 2 if -child-exit-code and the child returned with an error. * There is no platform independent way to check the exact return code of * the child, so on error always return 2. * Closes #947 * Closes #1503 --- command/lock.go | 41 ++++++++++++------- command/lock_test.go | 30 ++++++++++++++ .../docs/commands/lock.html.markdown.erb | 7 ++++ 3 files changed, 64 insertions(+), 14 deletions(-) diff --git a/command/lock.go b/command/lock.go index f7198fc6e1..4177186c2e 100644 --- a/command/lock.go +++ b/command/lock.go @@ -41,8 +41,7 @@ type LockCommand struct { child *os.Process childLock sync.Mutex - - verbose bool + verbose bool } func (c *LockCommand) Help() string { @@ -75,14 +74,18 @@ func (c *LockCommand) Run(args []string) int { } func (c *LockCommand) run(args []string, lu **LockUnlock) int { - var childDone chan struct{} var limit int var monitorRetry int var name string var passStdin bool + var propagateChildCode bool var timeout time.Duration f := c.BaseCommand.NewFlagSet(c) + f.BoolVar(&propagateChildCode, "child-exit-code", false, + "Exit 2 if the child process exited with an error if this is true, "+ + "otherwise this doesn't propagate an error from the child. The "+ + "default value is false.") f.IntVar(&limit, "n", 1, "Optional limit on the number of concurrent lock holders. The underlying "+ "implementation switches from a lock to a semaphore when the value is "+ @@ -100,7 +103,7 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int { "Pass stdin to the child process.") f.DurationVar(&timeout, "timeout", 0, "Maximum amount of time to wait to acquire the lock, specified as a "+ - "timestamp like \"1s\" or \"3h\". The default value is 0.") + "duration like \"1s\" or \"3h\". The default value is 0.") f.BoolVar(&c.verbose, "verbose", false, "Enable verbose (debugging) output.") @@ -185,6 +188,8 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int { } // Check if we were shutdown but managed to still acquire the lock + var childCode int + var childErr chan error select { case <-c.ShutdownCh: c.UI.Error("Shutdown triggered during lock acquisition") @@ -193,11 +198,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int { } // Start the child process - childDone = make(chan struct{}) + childErr = make(chan error, 1) go func() { - if err := c.startChild(script, childDone, passStdin); err != nil { - c.UI.Error(fmt.Sprintf("%s", err)) - } + childErr <- c.startChild(script, passStdin) }() // Monitor for shutdown, child termination, or lock loss @@ -210,7 +213,10 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int { if c.verbose { c.UI.Info("Lock lost, killing child") } - case <-childDone: + case err := <-childErr: + if err != nil { + childCode = 2 + } if c.verbose { c.UI.Info("Child terminated, releasing lock") } @@ -220,8 +226,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int { // Prevent starting a new child. The lock is never released // after this point. c.childLock.Lock() + // Kill any existing child - if err := c.killChild(childDone); err != nil { + if err := c.killChild(childErr); err != nil { c.UI.Error(fmt.Sprintf("%s", err)) } @@ -243,6 +250,13 @@ RELEASE: } else if c.verbose { c.UI.Info("Cleanup succeeded") } + + // If we detected an error from the child process then we propagate + // that. + if propagateChildCode { + return childCode + } + return 0 } @@ -321,8 +335,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name // startChild is a long running routine used to start and // wait for the child process to exit. -func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin bool) error { - defer close(doneCh) +func (c *LockCommand) startChild(script string, passStdin bool) error { if c.verbose { c.UI.Info(fmt.Sprintf("Starting handler '%s'", script)) } @@ -373,7 +386,7 @@ func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin // termination. // On Windows, the child is always hard terminated with a SIGKILL, even // on the first attempt. -func (c *LockCommand) killChild(childDone chan struct{}) error { +func (c *LockCommand) killChild(childErr chan error) error { // Get the child process child := c.child @@ -395,7 +408,7 @@ func (c *LockCommand) killChild(childDone chan struct{}) error { // Wait for termination, or until a timeout select { - case <-childDone: + case <-childErr: if c.verbose { c.UI.Info("Child terminated") } diff --git a/command/lock_test.go b/command/lock_test.go index 27e8d36167..d6f028e84c 100644 --- a/command/lock_test.go +++ b/command/lock_test.go @@ -257,3 +257,33 @@ func TestLockCommand_MonitorRetry_Semaphore_Arg(t *testing.T) { t.Fatalf("bad: %#v", opts) } } + +func TestLockCommand_ChildExitCode(t *testing.T) { + t.Parallel() + a := agent.NewTestAgent(t.Name(), nil) + defer a.Shutdown() + + t.Run("clean exit", func(t *testing.T) { + _, c := testLockCommand(t) + args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 0"} + if got, want := c.Run(args), 0; got != want { + t.Fatalf("got %d want %d", got, want) + } + }) + + t.Run("error exit", func(t *testing.T) { + _, c := testLockCommand(t) + args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 1"} + if got, want := c.Run(args), 2; got != want { + t.Fatalf("got %d want %d", got, want) + } + }) + + t.Run("not propagated", func(t *testing.T) { + _, c := testLockCommand(t) + args := []string{"-http-addr=" + a.HTTPAddr(), "test/prefix", "exit 1"} + if got, want := c.Run(args), 0; got != want { + t.Fatalf("got %d want %d", got, want) + } + }) +} diff --git a/website/source/docs/commands/lock.html.markdown.erb b/website/source/docs/commands/lock.html.markdown.erb index 1b9739a7ff..18f9103e57 100644 --- a/website/source/docs/commands/lock.html.markdown.erb +++ b/website/source/docs/commands/lock.html.markdown.erb @@ -52,6 +52,10 @@ Windows has no POSIX compatible notion for `SIGTERM`. #### Command Options +* `-child-exit-code` - Exit 2 if the child process exited with an error + if this is true, otherwise this doesn't propagate an error from the + child. The default value is false. + * `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error while monitoring the lock. This allows riding out brief periods of unavailability without causing leader elections, but increases the amount of time required @@ -67,6 +71,9 @@ Windows has no POSIX compatible notion for `SIGTERM`. * `-pass-stdin` - Pass stdin to child process. +* `timeout` - Maximum amount of time to wait to acquire the lock, specified + as a duration like `1s` or `3h`. The default value is 0. + * `-try` - Attempt to acquire the lock up to the given timeout. The timeout is a positive decimal number, with unit suffix, such as "500ms". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".