mirror of
https://github.com/status-im/consul.git
synced 2025-01-10 13:55:55 +00:00
16b19dd82d
* add config watcher to the config package * add logging to watcher * add test and refactor to add WatcherEvent. * add all API calls and fix a bug with recreated files * add tests for watcher * remove the unnecessary use of context * Add debug log and a test for file rename * use inode to detect if the file is recreated/replaced and only listen to create events. * tidy ups (#1535) * tidy ups * Add tests for inode reconcile * fix linux vs windows syscall * fix linux vs windows syscall * fix windows compile error * increase timeout * use ctime ID * remove remove/creation test as it's a use case that fail in linux * fix linux/windows to use Ino/CreationTime * fix the watcher to only overwrite current file id * fix linter error * fix remove/create test * set reconcile loop to 200 Milliseconds * fix watcher to not trigger event on remove, add more tests * on a remove event try to add the file back to the watcher and trigger the handler if success * fix race condition * fix flaky test * fix race conditions * set level to info * fix when file is removed and get an event for it after * fix to trigger handler when we get a remove but re-add fail * fix error message * add tests for directory watch and fixes * detect if a file is a symlink and return an error on Add * rename Watcher to FileWatcher and remove symlink deref * add fsnotify@v1.5.1 * fix go mod * do not reset timer on errors, rename OS specific files * rename New func * events trigger on write and rename * add missing test * fix flaking tests * fix flaky test * check reconcile when removed * delete invalid file * fix test to create files with different mod time. * back date file instead of sleeping * add watching file in agent command. * fix watcher call to use new API * add configuration and stop watcher when server stop * add certs as watched files * move FileWatcher to the agent start instead of the command code * stop watcher before replacing it * save watched files in agent * add add and remove interfaces to the file watcher * fix remove to not return an error * use `Add` and `Remove` to update certs files * fix tests * close events channel on the file watcher even when the context is done * extract `NotAutoReloadableRuntimeConfig` is a separate struct * fix linter errors * add Ca configs and outgoing verify to the not auto reloadable config * add some logs and fix to use background context * add tests to auto-config reload * remove stale test * add tests to changes to config files * add check to see if old cert files still trigger updates * rename `NotAutoReloadableRuntimeConfig` to `StaticRuntimeConfig` * fix to re add both key and cert file. Add test to cover this case. * review suggestion Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> * add check to static runtime config changes * fix test * add changelog file * fix review comments * Apply suggestions from code review Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> * update flag description Co-authored-by: FFMMM <FFMMM@users.noreply.github.com> * fix compilation error * add static runtime config support * fix test * fix review comments * fix log test * Update .changelog/12329.txt Co-authored-by: Dan Upton <daniel@floppy.co> * transfer tests to runtime_test.go * fix filewatcher Replace to not deadlock. * avoid having lingering locks Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> * split ReloadConfig func * fix warning message Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> * convert `FileWatcher` into an interface * fix compilation errors * fix tests * extract func for adding and removing files Co-authored-by: Ashwin Venkatesh <ashwin@hashicorp.com> Co-authored-by: R.B. Boyer <4903+rboyer@users.noreply.github.com> Co-authored-by: FFMMM <FFMMM@users.noreply.github.com> Co-authored-by: Daniel Upton <daniel@floppy.co>
363 lines
9.7 KiB
Go
363 lines
9.7 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-checkpoint"
|
|
"github.com/hashicorp/go-hclog"
|
|
mcli "github.com/mitchellh/cli"
|
|
|
|
"github.com/hashicorp/consul/agent"
|
|
"github.com/hashicorp/consul/agent/config"
|
|
"github.com/hashicorp/consul/command/cli"
|
|
"github.com/hashicorp/consul/command/flags"
|
|
"github.com/hashicorp/consul/lib"
|
|
"github.com/hashicorp/consul/logging"
|
|
"github.com/hashicorp/consul/service_os"
|
|
consulversion "github.com/hashicorp/consul/version"
|
|
)
|
|
|
|
func New(ui cli.Ui) *cmd {
|
|
c := &cmd{
|
|
ui: ui,
|
|
revision: consulversion.GitCommit,
|
|
version: consulversion.Version,
|
|
versionPrerelease: consulversion.VersionPrerelease,
|
|
versionHuman: consulversion.GetHumanVersion(),
|
|
flags: flag.NewFlagSet("", flag.ContinueOnError),
|
|
}
|
|
config.AddFlags(c.flags, &c.configLoadOpts)
|
|
c.help = flags.Usage(help, c.flags)
|
|
return c
|
|
}
|
|
|
|
// AgentCommand is a Command implementation that runs a Consul agent.
|
|
// The command will not end unless a shutdown message is sent on the
|
|
// ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
|
|
// exit.
|
|
type cmd struct {
|
|
ui cli.Ui
|
|
flags *flag.FlagSet
|
|
http *flags.HTTPFlags
|
|
help string
|
|
revision string
|
|
version string
|
|
versionPrerelease string
|
|
versionHuman string
|
|
configLoadOpts config.LoadOpts
|
|
logger hclog.InterceptLogger
|
|
}
|
|
|
|
func (c *cmd) Run(args []string) int {
|
|
code := c.run(args)
|
|
if c.logger != nil {
|
|
c.logger.Info("Exit code", "code", code)
|
|
}
|
|
return code
|
|
}
|
|
|
|
// checkpointResults is used to handler periodic results from our update checker
|
|
func (c *cmd) checkpointResults(results *checkpoint.CheckResponse, err error) {
|
|
if err != nil {
|
|
c.logger.Error("Failed to check for updates", "error", err)
|
|
return
|
|
}
|
|
if results.Outdated {
|
|
c.logger.Info("Newer Consul version available", "new_version", results.CurrentVersion, "current_version", c.version)
|
|
}
|
|
for _, alert := range results.Alerts {
|
|
switch alert.Level {
|
|
case "info":
|
|
c.logger.Info("Bulletin", "alert_level", alert.Level, "alert_message", alert.Message, "alert_URL", alert.URL)
|
|
default:
|
|
c.logger.Error("Bulletin", "alert_level", alert.Level, "alert_message", alert.Message, "alert_URL", alert.URL)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *cmd) startupUpdateCheck(config *config.RuntimeConfig) {
|
|
version := config.Version
|
|
if config.VersionPrerelease != "" {
|
|
version += fmt.Sprintf("-%s", config.VersionPrerelease)
|
|
}
|
|
updateParams := &checkpoint.CheckParams{
|
|
Product: "consul",
|
|
Version: version,
|
|
}
|
|
if !config.DisableAnonymousSignature {
|
|
updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
|
|
}
|
|
|
|
// Schedule a periodic check with expected interval of 24 hours
|
|
checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
|
|
|
|
// Do an immediate check within the next 30 seconds
|
|
go func() {
|
|
time.Sleep(lib.RandomStagger(30 * time.Second))
|
|
c.checkpointResults(checkpoint.Check(updateParams))
|
|
}()
|
|
}
|
|
|
|
// startupJoin is invoked to handle any joins specified to take place at start time
|
|
func (c *cmd) startupJoin(agent *agent.Agent, cfg *config.RuntimeConfig) error {
|
|
if len(cfg.StartJoinAddrsLAN) == 0 {
|
|
return nil
|
|
}
|
|
|
|
c.logger.Info("Joining cluster")
|
|
// NOTE: For partitioned servers you are only capable of using start join
|
|
// to join nodes in the default partition.
|
|
n, err := agent.JoinLAN(cfg.StartJoinAddrsLAN, agent.AgentEnterpriseMeta())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.logger.Info("Join completed. Initial agents synced with", "agent_count", n)
|
|
return nil
|
|
}
|
|
|
|
// startupJoinWan is invoked to handle any joins -wan specified to take place at start time
|
|
func (c *cmd) startupJoinWan(agent *agent.Agent, cfg *config.RuntimeConfig) error {
|
|
if len(cfg.StartJoinAddrsWAN) == 0 {
|
|
return nil
|
|
}
|
|
|
|
c.logger.Info("Joining wan cluster")
|
|
n, err := agent.JoinWAN(cfg.StartJoinAddrsWAN)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.logger.Info("Join wan completed. Initial agents synced with", "agent_count", n)
|
|
return nil
|
|
}
|
|
|
|
func (c *cmd) run(args []string) int {
|
|
ui := &mcli.PrefixedUi{
|
|
OutputPrefix: "==> ",
|
|
InfoPrefix: " ",
|
|
ErrorPrefix: "==> ",
|
|
Ui: c.ui,
|
|
}
|
|
|
|
if err := c.flags.Parse(args); err != nil {
|
|
if !strings.Contains(err.Error(), "help requested") {
|
|
ui.Error(fmt.Sprintf("error parsing flags: %v", err))
|
|
}
|
|
return 1
|
|
}
|
|
if len(c.flags.Args()) > 0 {
|
|
ui.Error(fmt.Sprintf("Unexpected extra arguments: %v", c.flags.Args()))
|
|
return 1
|
|
}
|
|
|
|
// FIXME: logs should always go to stderr, but previously they were sent to
|
|
// stdout, so continue to use Stdout for now, and fix this in a future release.
|
|
logGate := &logging.GatedWriter{Writer: c.ui.Stdout()}
|
|
loader := func(source config.Source) (config.LoadResult, error) {
|
|
c.configLoadOpts.DefaultConfig = source
|
|
return config.Load(c.configLoadOpts)
|
|
}
|
|
bd, err := agent.NewBaseDeps(loader, logGate)
|
|
if err != nil {
|
|
ui.Error(err.Error())
|
|
return 1
|
|
}
|
|
c.logger = bd.Logger
|
|
agent, err := agent.New(bd)
|
|
if err != nil {
|
|
ui.Error(err.Error())
|
|
return 1
|
|
}
|
|
|
|
config := bd.RuntimeConfig
|
|
if config.Logging.LogJSON {
|
|
// Hide all non-error output when JSON logging is enabled.
|
|
ui.Ui = &cli.BasicUI{
|
|
BasicUi: mcli.BasicUi{ErrorWriter: c.ui.Stderr(), Writer: ioutil.Discard},
|
|
}
|
|
}
|
|
|
|
ui.Output("Starting Consul agent...")
|
|
|
|
segment := config.SegmentName
|
|
if config.ServerMode {
|
|
segment = "<all>"
|
|
}
|
|
ui.Info(fmt.Sprintf(" Version: '%s'", c.versionHuman))
|
|
ui.Info(fmt.Sprintf(" Node ID: '%s'", config.NodeID))
|
|
ui.Info(fmt.Sprintf(" Node name: '%s'", config.NodeName))
|
|
if ap := config.PartitionOrEmpty(); ap != "" {
|
|
ui.Info(fmt.Sprintf(" Partition: '%s'", ap))
|
|
}
|
|
ui.Info(fmt.Sprintf(" Datacenter: '%s' (Segment: '%s')", config.Datacenter, segment))
|
|
ui.Info(fmt.Sprintf(" Server: %v (Bootstrap: %v)", config.ServerMode, config.Bootstrap))
|
|
ui.Info(fmt.Sprintf(" Client Addr: %v (HTTP: %d, HTTPS: %d, gRPC: %d, DNS: %d)", config.ClientAddrs,
|
|
config.HTTPPort, config.HTTPSPort, config.GRPCPort, config.DNSPort))
|
|
ui.Info(fmt.Sprintf(" Cluster Addr: %v (LAN: %d, WAN: %d)", config.AdvertiseAddrLAN,
|
|
config.SerfPortLAN, config.SerfPortWAN))
|
|
ui.Info(fmt.Sprintf(" Encrypt: Gossip: %v, TLS-Outgoing: %v, TLS-Incoming: %v, Auto-Encrypt-TLS: %t",
|
|
config.EncryptKey != "", config.TLS.InternalRPC.VerifyOutgoing, config.TLS.InternalRPC.VerifyIncoming, config.AutoEncryptTLS || config.AutoEncryptAllowTLS))
|
|
// Enable log streaming
|
|
ui.Output("")
|
|
ui.Output("Log data will now stream in as it occurs:\n")
|
|
logGate.Flush()
|
|
|
|
// wait for signal
|
|
signalCh := make(chan os.Signal, 10)
|
|
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
go func() {
|
|
for {
|
|
var sig os.Signal
|
|
select {
|
|
case s := <-signalCh:
|
|
sig = s
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
|
|
switch sig {
|
|
case syscall.SIGPIPE:
|
|
continue
|
|
|
|
case syscall.SIGHUP:
|
|
err := fmt.Errorf("cannot reload before agent started")
|
|
c.logger.Error("Caught", "signal", sig, "error", err)
|
|
|
|
default:
|
|
c.logger.Info("Caught", "signal", sig)
|
|
cancel()
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
err = agent.Start(ctx)
|
|
signal.Stop(signalCh)
|
|
cancel()
|
|
|
|
if err != nil {
|
|
c.logger.Error("Error starting agent", "error", err)
|
|
return 1
|
|
}
|
|
|
|
// shutdown agent before endpoints
|
|
defer agent.ShutdownEndpoints()
|
|
defer agent.ShutdownAgent()
|
|
|
|
if !config.DisableUpdateCheck && !config.DevMode {
|
|
c.startupUpdateCheck(config)
|
|
}
|
|
|
|
if err := c.startupJoin(agent, config); err != nil {
|
|
c.logger.Error(err.Error())
|
|
return 1
|
|
}
|
|
|
|
if err := c.startupJoinWan(agent, config); err != nil {
|
|
c.logger.Error(err.Error())
|
|
return 1
|
|
}
|
|
|
|
// Let the agent know we've finished registration
|
|
agent.StartSync()
|
|
|
|
c.logger.Info("Consul agent running!")
|
|
|
|
// wait for signal
|
|
signalCh = make(chan os.Signal, 10)
|
|
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
|
|
|
|
for {
|
|
var sig os.Signal
|
|
select {
|
|
case s := <-signalCh:
|
|
sig = s
|
|
case <-service_os.Shutdown_Channel():
|
|
sig = os.Interrupt
|
|
case err := <-agent.RetryJoinCh():
|
|
c.logger.Error("Retry join failed", "error", err)
|
|
return 1
|
|
case <-agent.Failed():
|
|
// The deferred Shutdown method will log the appropriate error
|
|
return 1
|
|
case <-agent.ShutdownCh():
|
|
// agent is already down!
|
|
return 0
|
|
}
|
|
|
|
switch sig {
|
|
case syscall.SIGPIPE:
|
|
continue
|
|
|
|
case syscall.SIGHUP:
|
|
c.logger.Info("Caught", "signal", sig)
|
|
|
|
err := agent.ReloadConfig()
|
|
if err != nil {
|
|
c.logger.Error("Reload config failed", "error", err)
|
|
}
|
|
config = agent.GetConfig()
|
|
default:
|
|
c.logger.Info("Caught", "signal", sig)
|
|
|
|
graceful := (sig == os.Interrupt && !(config.SkipLeaveOnInt)) || (sig == syscall.SIGTERM && (config.LeaveOnTerm))
|
|
if !graceful {
|
|
c.logger.Info("Graceful shutdown disabled. Exiting")
|
|
return 1
|
|
}
|
|
|
|
c.logger.Info("Gracefully shutting down agent...")
|
|
gracefulCh := make(chan struct{})
|
|
go func() {
|
|
if err := agent.Leave(); err != nil {
|
|
c.logger.Error("Error on leave", "error", err)
|
|
return
|
|
}
|
|
close(gracefulCh)
|
|
}()
|
|
|
|
gracefulTimeout := 15 * time.Second
|
|
select {
|
|
case <-signalCh:
|
|
c.logger.Info("Caught second signal, Exiting", "signal", sig)
|
|
return 1
|
|
case <-time.After(gracefulTimeout):
|
|
c.logger.Info("Timeout on graceful leave. Exiting")
|
|
return 1
|
|
case <-gracefulCh:
|
|
c.logger.Info("Graceful exit completed")
|
|
return 0
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *cmd) Synopsis() string {
|
|
return synopsis
|
|
}
|
|
|
|
func (c *cmd) Help() string {
|
|
return c.help
|
|
}
|
|
|
|
const synopsis = "Runs a Consul agent"
|
|
const help = `
|
|
Usage: consul agent [options]
|
|
|
|
Starts the Consul agent and runs until an interrupt is received. The
|
|
agent represents a single node in a cluster.
|
|
`
|