From 31a310f551c36243d2aae9ed80054c200e079b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Frank=20Schr=C3=B6der?= Date: Wed, 21 Jun 2017 06:43:55 +0200 Subject: [PATCH] agent: notify systemd after JoinLAN (#2121) This patch adds support for notifying systemd via the NOTIFY_SOCKET by sending 'READY=1' to the socket after a successful JoinLAN. Fixes #2121 --- agent/agent.go | 47 ++++++++++++++++-------- agent/agent_endpoint_test.go | 32 ++++++++++++++++ agent/systemd/notify.go | 42 +++++++++++++++++++++ website/source/docs/agent/basics.html.md | 5 +++ 4 files changed, 110 insertions(+), 16 deletions(-) create mode 100644 agent/systemd/notify.go diff --git a/agent/agent.go b/agent/agent.go index a36115b431..0564b0b6c5 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -22,6 +22,7 @@ import ( "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" + "github.com/hashicorp/consul/agent/systemd" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/ipaddr" "github.com/hashicorp/consul/lib" @@ -71,6 +72,11 @@ type delegate interface { Stats() map[string]map[string]string } +// notifier is called after a successful JoinLAN. +type notifier interface { + Notify(string) error +} + // The agent is the long running process that is run on every machine. // It exposes an RPC interface that is used by the CLI to control the // agent. The agent runs the query interfaces like HTTP, DNS, and RPC. @@ -141,6 +147,9 @@ type Agent struct { shutdownCh chan struct{} shutdownLock sync.Mutex + // joinLANNotifier is called after a successful JoinLAN. + joinLANNotifier notifier + // retryJoinCh transports errors from the retry join // attempts. retryJoinCh chan error @@ -188,22 +197,23 @@ func New(c *Config) (*Agent, error) { } a := &Agent{ - config: c, - acls: acls, - checkReapAfter: make(map[types.CheckID]time.Duration), - checkMonitors: make(map[types.CheckID]*CheckMonitor), - checkTTLs: make(map[types.CheckID]*CheckTTL), - checkHTTPs: make(map[types.CheckID]*CheckHTTP), - checkTCPs: make(map[types.CheckID]*CheckTCP), - checkDockers: make(map[types.CheckID]*CheckDocker), - eventCh: make(chan serf.UserEvent, 1024), - eventBuf: make([]*UserEvent, 256), - reloadCh: make(chan chan error), - retryJoinCh: make(chan error), - shutdownCh: make(chan struct{}), - endpoints: make(map[string]string), - dnsAddrs: dnsAddrs, - httpAddrs: httpAddrs, + config: c, + acls: acls, + checkReapAfter: make(map[types.CheckID]time.Duration), + checkMonitors: make(map[types.CheckID]*CheckMonitor), + checkTTLs: make(map[types.CheckID]*CheckTTL), + checkHTTPs: make(map[types.CheckID]*CheckHTTP), + checkTCPs: make(map[types.CheckID]*CheckTCP), + checkDockers: make(map[types.CheckID]*CheckDocker), + eventCh: make(chan serf.UserEvent, 1024), + eventBuf: make([]*UserEvent, 256), + joinLANNotifier: &systemd.Notifier{}, + reloadCh: make(chan chan error), + retryJoinCh: make(chan error), + shutdownCh: make(chan struct{}), + endpoints: make(map[string]string), + dnsAddrs: dnsAddrs, + httpAddrs: httpAddrs, } if err := a.resolveTmplAddrs(); err != nil { return nil, err @@ -1216,6 +1226,11 @@ func (a *Agent) JoinLAN(addrs []string) (n int, err error) { a.logger.Printf("[INFO] agent: (LAN) joining: %v", addrs) n, err = a.delegate.JoinLAN(addrs) a.logger.Printf("[INFO] agent: (LAN) joined: %d Err: %v", n, err) + if err == nil && a.joinLANNotifier != nil { + if notifErr := a.joinLANNotifier.Notify(systemd.Ready); notifErr != nil { + a.logger.Printf("[DEBUG] agent: systemd notify failed: ", notifErr) + } + } return } diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 771a45675c..fe101a4256 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -447,6 +447,38 @@ func TestAgent_Join_ACLDeny(t *testing.T) { }) } +type mockNotifier struct{ s string } + +func (n *mockNotifier) Notify(state string) error { + n.s = state + return nil +} + +func TestAgent_JoinLANNotify(t *testing.T) { + t.Parallel() + a1 := NewTestAgent(t.Name(), nil) + defer a1.Shutdown() + + cfg2 := TestConfig() + cfg2.Server = false + cfg2.Bootstrap = false + a2 := NewTestAgent(t.Name(), cfg2) + defer a2.Shutdown() + + notif := &mockNotifier{} + a1.joinLANNotifier = notif + + addr := fmt.Sprintf("127.0.0.1:%d", a2.Config.Ports.SerfLan) + _, err := a1.JoinLAN([]string{addr}) + if err != nil { + t.Fatalf("err: %v", err) + } + + if got, want := notif.s, "READY=1"; got != want { + t.Fatalf("got joinLAN notification %q want %q", got, want) + } +} + func TestAgent_Leave(t *testing.T) { t.Parallel() a1 := NewTestAgent(t.Name(), nil) diff --git a/agent/systemd/notify.go b/agent/systemd/notify.go new file mode 100644 index 0000000000..445d0ecb2d --- /dev/null +++ b/agent/systemd/notify.go @@ -0,0 +1,42 @@ +package systemd + +import ( + "errors" + "net" + "os" +) + +const ( + // magic values for systemd + // from https://www.freedesktop.org/software/systemd/man/sd_notify.html#Description + + Ready = "READY=1" + Reloading = "RELOADING=1" + Stopping = "STOPPING=1" +) + +var NotifyNoSocket = errors.New("No socket") + +// Notifier provides a method to send a message to systemd. +type Notifier struct{} + +// Notify sends a message to the init daemon. It is common to ignore the error. +func (n *Notifier) Notify(state string) error { + addr := &net.UnixAddr{ + Name: os.Getenv("NOTIFY_SOCKET"), + Net: "unixgram", + } + + if addr.Name == "" { + return NotifyNoSocket + } + + conn, err := net.DialUnix(addr.Net, nil, addr) + if err != nil { + return err + } + defer conn.Close() + + _, err = conn.Write([]byte(state)) + return err +} diff --git a/website/source/docs/agent/basics.html.md b/website/source/docs/agent/basics.html.md index 3ca9f1009f..a03f9f2a5c 100644 --- a/website/source/docs/agent/basics.html.md +++ b/website/source/docs/agent/basics.html.md @@ -74,6 +74,11 @@ There are several important messages that [`consul agent`](/docs/commands/agent. Consul agents in a cluster. Not all Consul agents in a cluster have to use the same port, but this address **MUST** be reachable by all other nodes. +When running under `systemd` on Linux, Consul notifies systemd by sending +`READY=1` to the `$NOTIFY_SOCKET` when a LAN join has completed. For +this either the `join` or `retry_join` option has to be set and the +service definition file has to have `Type=notify` set. + ## Stopping an Agent An agent can be stopped in two ways: gracefully or forcefully. To gracefully