From c60fd0542a78948194ed4bd3a9da784a7bf13e26 Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Mon, 16 Jun 2014 17:36:12 -0400 Subject: [PATCH 01/43] Add expect bootstrap '-expect=n' mode. This allows for us to automatically bootstrap a cluster of nodes after 'n' number of server nodes join. All servers must have the same 'n' set, or they will fail to join the cluster; all servers will not join the peer set until they hit 'n' server nodes. If the raft commit index is not empty, '-expect=n' does nothing because it thinks you've already bootstrapped. Signed-off-by: Robert Xu --- command/agent/agent.go | 10 ++- command/agent/command.go | 35 +++++++- command/agent/config.go | 13 ++- command/agent/config_test.go | 17 ++++ consul/config.go | 5 ++ consul/leader.go | 58 ++++++++++++- consul/server.go | 12 +-- consul/server_test.go | 158 ++++++++++++++++++++++++++++++++++- consul/util.go | 11 ++- consul/util_test.go | 21 +++-- 10 files changed, 316 insertions(+), 24 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 62784f8d68..4ee1a6f8f8 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -2,15 +2,16 @@ package agent import ( "fmt" - "github.com/hashicorp/consul/consul" - "github.com/hashicorp/consul/consul/structs" - "github.com/hashicorp/serf/serf" "io" "log" "net" "os" "strconv" "sync" + + "github.com/hashicorp/consul/consul" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/serf/serf" ) /* @@ -171,6 +172,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.Expect != 0 { + base.Expect = a.config.Expect + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } diff --git a/command/agent/command.go b/command/agent/command.go index 2c9c5e271e..cdc833bc44 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -3,10 +3,6 @@ package agent import ( "flag" "fmt" - "github.com/armon/go-metrics" - "github.com/hashicorp/go-syslog" - "github.com/hashicorp/logutils" - "github.com/mitchellh/cli" "io" "net" "os" @@ -16,6 +12,11 @@ import ( "strings" "syscall" "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-syslog" + "github.com/hashicorp/logutils" + "github.com/mitchellh/cli" ) // gracefulTimeout controls how long we wait before forcefully terminating @@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") + cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config { return nil } + // Expect can only work when acting as a server + if config.Expect != 0 && !config.Server { + c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") + return nil + } + + // Expect & Bootstrap are mutually exclusive + if config.Expect != 0 && config.Bootstrap { + c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive") + return nil + } + + // Warn if we are in expect mode + if config.Expect != 0 { + if config.Expect == 1 { + // just use bootstrap mode + c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") + config.Expect = 0 + config.Bootstrap = true + } else { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) + } + } + // Warn if we are in bootstrap mode if config.Bootstrap { c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary") @@ -524,6 +550,7 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent + -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/command/agent/config.go b/command/agent/config.go index a5a2bc4524..c3631429a8 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -4,8 +4,6 @@ import ( "encoding/base64" "encoding/json" "fmt" - "github.com/hashicorp/consul/consul" - "github.com/mitchellh/mapstructure" "io" "net" "os" @@ -13,6 +11,9 @@ import ( "sort" "strings" "time" + + "github.com/hashicorp/consul/consul" + "github.com/mitchellh/mapstructure" ) // Ports is used to simplify the configuration by @@ -64,6 +65,10 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` + // Expect tries to automatically bootstrap the Consul cluster, + // by witholding peers until enough servers join. + Expect int `mapstructure:"expect"` + // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part // in leader election, etc. @@ -219,6 +224,7 @@ type dirEnts []os.FileInfo func DefaultConfig() *Config { return &Config{ Bootstrap: false, + Expect: 0, Server: false, Datacenter: consul.DefaultDC, Domain: "consul.", @@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } + if b.Expect != 0 { + result.Expect = b.Expect + } if b.Datacenter != "" { result.Datacenter = b.Datacenter } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index b1c83d4795..0225630d08 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } + // Expect bootstrap + input = `{"server": true, "expect": 3}` + config, err = DecodeConfig(bytes.NewReader([]byte(input))) + if err != nil { + t.Fatalf("err: %s", err) + } + + if !config.Server { + t.Fatalf("bad: %#v", config) + } + + if config.Expect != 3 { + t.Fatalf("bad: %#v", config) + } + // DNS setup input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) @@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, + Expect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) { b := &Config{ Bootstrap: true, + Expect: 3, Datacenter: "dc2", DataDir: "/tmp/bar", DNSRecursor: "127.0.0.2:1001", diff --git a/consul/config.go b/consul/config.go index 6000177a8f..ae6c482823 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,6 +44,11 @@ type Config struct { // other nodes being present Bootstrap bool + // Expect mode is used to automatically bring up a collection of + // Consul servers. This can be used to automatically bring up a collection + // of nodes. + Expect int + // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/leader.go b/consul/leader.go index d09f11185e..8cbc842730 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -1,13 +1,14 @@ package consul import ( + "net" + "strconv" + "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" - "net" - "strconv" - "time" ) const ( @@ -368,6 +369,57 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error { } } + // Or, check for possibility that expect is not the same. + if parts.Expect != 0 { + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name) + return nil + } + } + } + + // If we're not a bootstrapped server, we're expecting servers, + // and our raft index is zero, try to auto bootstrap. + if !s.config.Bootstrap && s.config.Expect != 0 { + if index, _ := s.raftStore.LastIndex(); index == 0 { + // do not do standard op and add peer... yet + count := 0 + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect == parts.Expect { + count++ + if count >= s.config.Expect { + break + } + } + } + + if count >= s.config.Expect { + // we've met expected limit - add servers + s.config.RaftConfig.EnableSingleNode = false + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port} + future := s.raft.AddPeer(addAddr) + + if err := future.Error(); err != nil && err != raft.ErrKnownPeer { + s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err) + // hmm.... + } + } + } + } else { + // not enough servers yet + return nil + } + } + } + // Attempt to add as a peer var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} future := s.raft.AddPeer(addr) diff --git a/consul/server.go b/consul/server.go index e7dd195f34..91eafb19be 100644 --- a/consul/server.go +++ b/consul/server.go @@ -4,9 +4,6 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/hashicorp/raft" - "github.com/hashicorp/raft-mdb" - "github.com/hashicorp/serf/serf" "log" "net" "net/rpc" @@ -17,6 +14,10 @@ import ( "strconv" "sync" "time" + + "github.com/hashicorp/raft" + "github.com/hashicorp/raft-mdb" + "github.com/hashicorp/serf/serf" ) // These are the protocol versions that Consul can _understand_. These are @@ -233,6 +234,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch @@ -252,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w // setupRaft is used to setup and initialize Raft func (s *Server) setupRaft() error { - // If we are in bootstrap mode, enable a single node cluster - if s.config.Bootstrap { + // If we are in bootstrap or expect mode, enable a single node cluster + if s.config.Bootstrap || s.config.Expect != 0 { s.config.RaftConfig.EnableSingleNode = true } diff --git a/consul/server_test.go b/consul/server_test.go index b8edc6ef48..a00f7f34a3 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -3,12 +3,13 @@ package consul import ( "errors" "fmt" - "github.com/hashicorp/consul/testutil" "io/ioutil" "net" "os" "testing" "time" + + "github.com/hashicorp/consul/testutil" ) var nextPort = 15000 @@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se return dir, server } +func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { + name := fmt.Sprintf("Node %d", getPort()) + dir, config := testServerConfig(t, name) + config.Datacenter = dc + config.Bootstrap = false + config.Expect = expect + server, err := NewServer(config) + if err != nil { + t.Fatalf("err: %v", err) + } + return dir, server +} + func TestServer_StartStop(t *testing.T) { dir := tmpDir(t) defer os.RemoveAll(dir) @@ -304,3 +318,145 @@ func TestServer_JoinLAN_TLS(t *testing.T) { t.Fatalf("no peer established") }) } + +func TestServer_Expect(t *testing.T) { + // all test servers should be in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should now have all three peers + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + +} + +func TestServer_BadExpect(t *testing.T) { + // this one is in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + // this one is in expect=2 mode + dir2, s2 := testServerDCExpect(t, "dc1", 2) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + // and this one is in expect=3 mode + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should still have no peers (because s2 is in expect=2 mode) + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + +} diff --git a/consul/util.go b/consul/util.go index 402ecee734..dc5b6ef2a0 100644 --- a/consul/util.go +++ b/consul/util.go @@ -4,12 +4,13 @@ import ( crand "crypto/rand" "encoding/binary" "fmt" - "github.com/hashicorp/serf/serf" "net" "os" "path/filepath" "runtime" "strconv" + + "github.com/hashicorp/serf/serf" ) /* @@ -26,6 +27,7 @@ type serverParts struct { Datacenter string Port int Bootstrap bool + Expect int Version int Addr net.Addr } @@ -84,6 +86,12 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] + expect_str := m.Tags["expect"] + expect, err := strconv.Atoi(expect_str) + if err != nil { + return false, nil + } + port_str := m.Tags["port"] port, err := strconv.Atoi(port_str) if err != nil { @@ -103,6 +111,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { Datacenter: datacenter, Port: port, Bootstrap: bootstrap, + Expect: expect, Addr: addr, Version: vsn, } diff --git a/consul/util_test.go b/consul/util_test.go index 65e5e99ed4..e360f523ce 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -1,10 +1,11 @@ package consul import ( - "github.com/hashicorp/serf/serf" "net" "regexp" "testing" + + "github.com/hashicorp/serf/serf" ) func TestStrContains(t *testing.T) { @@ -40,10 +41,11 @@ func TestIsConsulServer(t *testing.T) { Name: "foo", Addr: net.IP([]byte{127, 0, 0, 1}), Tags: map[string]string{ - "role": "consul", - "dc": "east-aws", - "port": "10000", - "vsn": "1", + "expect": "0", + "role": "consul", + "dc": "east-aws", + "port": "10000", + "vsn": "1", }, } valid, parts := isConsulServer(m) @@ -56,6 +58,9 @@ func TestIsConsulServer(t *testing.T) { if parts.Bootstrap { t.Fatalf("unexpected bootstrap") } + if parts.Expect != 0 { + t.Fatalf("bad: %v", parts.Expect) + } m.Tags["bootstrap"] = "1" valid, parts = isConsulServer(m) if !valid || !parts.Bootstrap { @@ -67,6 +72,12 @@ func TestIsConsulServer(t *testing.T) { if parts.Version != 1 { t.Fatalf("bad: %v", parts) } + m.Tags["expect"] = "3" + delete(m.Tags, "bootstrap") + valid, parts = isConsulServer(m) + if !valid || parts.Expect != 3 { + t.Fatalf("bad: %v", parts.Expect) + } } func TestIsConsulNode(t *testing.T) { From ea1c3c4ea1592b014af73ff872f32234d84b4733 Mon Sep 17 00:00:00 2001 From: Phillip Markert Date: Tue, 17 Jun 2014 09:11:13 -0400 Subject: [PATCH 02/43] website: Fixed semantic meaning of upstream in DNS guide --- website/source/docs/guides/dns-cache.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/guides/dns-cache.html.markdown b/website/source/docs/guides/dns-cache.html.markdown index 37d79fc476..007d3ed712 100644 --- a/website/source/docs/guides/dns-cache.html.markdown +++ b/website/source/docs/guides/dns-cache.html.markdown @@ -41,7 +41,7 @@ of the leader. ## TTL Values -TTL values can be set to allow DNS results to be cached upstream +TTL values can be set to allow DNS results to be cached downstream of Consul which can be reduce the number of lookups and to amortize the latency of doing a DNS lookup. By default, all TTLs are zero, preventing any caching. From 10c120bf9dc60d511d04e95cad8ddf308902cf72 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 17 Jun 2014 16:48:19 -0700 Subject: [PATCH 03/43] agent: Fix issues with re-registration. Fixes #216 --- command/agent/agent.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 62784f8d68..65654ee04b 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -393,7 +393,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkType *CheckType) err ServiceName: service.Service, } if err := a.AddCheck(check, chkType); err != nil { - a.state.RemoveService(service.ID) return err } } @@ -429,8 +428,8 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType) error { // Check if already registered if chkType != nil { if chkType.IsTTL() { - if _, ok := a.checkTTLs[check.CheckID]; ok { - return fmt.Errorf("CheckID is already registered") + if existing, ok := a.checkTTLs[check.CheckID]; ok { + existing.Stop() } ttl := &CheckTTL{ @@ -443,8 +442,8 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType) error { a.checkTTLs[check.CheckID] = ttl } else { - if _, ok := a.checkMonitors[check.CheckID]; ok { - return fmt.Errorf("CheckID is already registered") + if existing, ok := a.checkMonitors[check.CheckID]; ok { + existing.Stop() } if chkType.Interval < MinInterval { a.logger.Println(fmt.Sprintf("[WARN] agent: check '%s' has interval below minimum of %v", From bc4a4fe09b9cb0862786db4c9d34348d9ad238a4 Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Wed, 18 Jun 2014 12:03:30 -0400 Subject: [PATCH 04/43] Utilise new raft.SetPeers() method, move expect logic to leader.go. This way, we don't use EnableSingleMode, nor cause chaos adding peers. Signed-off-by: Robert Xu --- command/agent/command.go | 2 +- consul/leader.go | 51 ---------------------------------------- consul/serf.go | 39 +++++++++++++++++++++++++++++- consul/server.go | 4 ++-- 4 files changed, 41 insertions(+), 55 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index cdc833bc44..25e04dc44d 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -137,7 +137,7 @@ func (c *Command) readConfig() *Config { // Expect & Bootstrap are mutually exclusive if config.Expect != 0 && config.Bootstrap { - c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive") + c.Ui.Error("Bootstrap cannot be provided with an expected server count") return nil } diff --git a/consul/leader.go b/consul/leader.go index 8cbc842730..b63f7bbe80 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -369,57 +369,6 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error { } } - // Or, check for possibility that expect is not the same. - if parts.Expect != 0 { - members := s.serfLAN.Members() - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect != parts.Expect { - s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name) - return nil - } - } - } - - // If we're not a bootstrapped server, we're expecting servers, - // and our raft index is zero, try to auto bootstrap. - if !s.config.Bootstrap && s.config.Expect != 0 { - if index, _ := s.raftStore.LastIndex(); index == 0 { - // do not do standard op and add peer... yet - count := 0 - members := s.serfLAN.Members() - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect == parts.Expect { - count++ - if count >= s.config.Expect { - break - } - } - } - - if count >= s.config.Expect { - // we've met expected limit - add servers - s.config.RaftConfig.EnableSingleNode = false - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect != parts.Expect { - addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port} - future := s.raft.AddPeer(addAddr) - - if err := future.Error(); err != nil && err != raft.ErrKnownPeer { - s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err) - // hmm.... - } - } - } - } else { - // not enough servers yet - return nil - } - } - } - // Attempt to add as a peer var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} future := s.raft.AddPeer(addr) diff --git a/consul/serf.go b/consul/serf.go index 11a48ee471..8a34cc9b3f 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -1,8 +1,10 @@ package consul import ( - "github.com/hashicorp/serf/serf" + "net" "strings" + + "github.com/hashicorp/serf/serf" ) const ( @@ -149,6 +151,41 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { s.localConsuls[parts.Addr.String()] = parts s.localLock.Unlock() } + + // If we're still expecting, and they are too, check servers. + if s.config.Expect != 0 && parts.Expect != 0 { + index, err := s.raftStore.LastIndex() + if err == nil && index == 0 { + members := s.serfLAN.Members() + addrs := make([]net.Addr, 0) + for _, member := range members { + valid, p := isConsulServer(member) + if valid { + if p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) + return + } else { + addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) + } + } + } + + if len(addrs) >= s.config.Expect { + // we have enough nodes, set peers. + + future := s.raft.SetPeers(addrs) + + if err := future.Error(); err != nil { + s.logger.Printf("[ERR] consul: failed to leave expect mode and set peers: %v", err) + } else { + // we've left expect mode, don't enter this again + s.config.Expect = 0 + } + } + } else if err != nil { + s.logger.Printf("[ERR] consul: error retrieving index: %v", err) + } + } } } diff --git a/consul/server.go b/consul/server.go index 91eafb19be..d431ebd1df 100644 --- a/consul/server.go +++ b/consul/server.go @@ -254,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w // setupRaft is used to setup and initialize Raft func (s *Server) setupRaft() error { - // If we are in bootstrap or expect mode, enable a single node cluster - if s.config.Bootstrap || s.config.Expect != 0 { + // If we are in bootstrap mode, enable a single node cluster + if s.config.Bootstrap { s.config.RaftConfig.EnableSingleNode = true } From a05e1aee15d305f83d1965adc35d8ca6511226e0 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 10:32:19 -0700 Subject: [PATCH 05/43] agent: Fixing missing copy of RejoinAfterLeave flag. #110 --- command/agent/agent.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command/agent/agent.go b/command/agent/agent.go index 65654ee04b..472adf53da 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -171,6 +171,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.RejoinAfterLeave { + base.RejoinAfterLeave = true + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } From f5fa8c1d2c4b0a7b264ee661ad5ffe3f6a5ad967 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 10:35:42 -0700 Subject: [PATCH 06/43] CHANGELOG updates --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 640c94104a..8b6834119c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## 0.3.1 (Unreleased) + +BUG FIXES: + + * Fixed issue with service re-registration [GH-216] + * Fixed handling of `-rejoin` flag + +IMPROVEMENTS: + + * Improved handling of Serf snapshot data + * Increase reliability of failure detector + + ## 0.3.0 (June 13, 2014) FEATURES: From 7b456a6d6d045bc4741700bb4417f7601ecf616b Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Wed, 18 Jun 2014 18:47:05 -0400 Subject: [PATCH 07/43] Minor cleanup to logic and testsuite. Signed-off-by: Robert Xu --- consul/serf.go | 2 +- consul/server.go | 4 +++- consul/server_test.go | 14 ++++++++------ consul/util.go | 12 ++++++++---- consul/util_test.go | 9 ++++----- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/consul/serf.go b/consul/serf.go index 8a34cc9b3f..c23c10c81e 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -160,7 +160,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { addrs := make([]net.Addr, 0) for _, member := range members { valid, p := isConsulServer(member) - if valid { + if valid && p.Datacenter == parts.Datacenter { if p.Expect != parts.Expect { s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) return diff --git a/consul/server.go b/consul/server.go index d431ebd1df..d126e2343e 100644 --- a/consul/server.go +++ b/consul/server.go @@ -234,7 +234,9 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } - conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + if s.config.Expect != 0 { + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch diff --git a/consul/server_test.go b/consul/server_test.go index a00f7f34a3..109f5081b8 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -353,7 +353,7 @@ func TestServer_Expect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) @@ -375,18 +375,20 @@ func TestServer_Expect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 3 peers: %v", err) }) testutil.WaitForResult(func() (bool, error) { p3, _ = s3.raftPeers.Peers() - return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1)) + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p3)) }, func(err error) { t.Fatalf("should have 3 peers: %v", err) }) + // check if there is one leader now + testutil.WaitForLeader(t, s1.RPC, "dc1") } func TestServer_BadExpect(t *testing.T) { @@ -425,7 +427,7 @@ func TestServer_BadExpect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) @@ -447,14 +449,14 @@ func TestServer_BadExpect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) testutil.WaitForResult(func() (bool, error) { p3, _ = s3.raftPeers.Peers() - return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p3)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) diff --git a/consul/util.go b/consul/util.go index dc5b6ef2a0..00815ea10c 100644 --- a/consul/util.go +++ b/consul/util.go @@ -86,10 +86,14 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] - expect_str := m.Tags["expect"] - expect, err := strconv.Atoi(expect_str) - if err != nil { - return false, nil + expect := 0 + expect_str, ok := m.Tags["expect"] + var err error + if ok { + expect, err = strconv.Atoi(expect_str) + if err != nil { + return false, nil + } } port_str := m.Tags["port"] diff --git a/consul/util_test.go b/consul/util_test.go index e360f523ce..107146b521 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -41,11 +41,10 @@ func TestIsConsulServer(t *testing.T) { Name: "foo", Addr: net.IP([]byte{127, 0, 0, 1}), Tags: map[string]string{ - "expect": "0", - "role": "consul", - "dc": "east-aws", - "port": "10000", - "vsn": "1", + "role": "consul", + "dc": "east-aws", + "port": "10000", + "vsn": "1", }, } valid, parts := isConsulServer(m) From 92b6e947ddf8942864d5e67c78e4e64cd13847fb Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 16:15:28 -0700 Subject: [PATCH 08/43] consul: Minor cleanups --- consul/serf.go | 91 +++++++++++++++++++++++++++---------------- consul/server_test.go | 2 +- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/consul/serf.go b/consul/serf.go index c23c10c81e..e31abef1a8 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -146,49 +146,72 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { s.remoteLock.Unlock() // Add to the local list as well - if !wan { + if !wan && parts.Datacenter == s.config.Datacenter { s.localLock.Lock() s.localConsuls[parts.Addr.String()] = parts s.localLock.Unlock() } - // If we're still expecting, and they are too, check servers. - if s.config.Expect != 0 && parts.Expect != 0 { - index, err := s.raftStore.LastIndex() - if err == nil && index == 0 { - members := s.serfLAN.Members() - addrs := make([]net.Addr, 0) - for _, member := range members { - valid, p := isConsulServer(member) - if valid && p.Datacenter == parts.Datacenter { - if p.Expect != parts.Expect { - s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) - return - } else { - addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) - } - } - } - - if len(addrs) >= s.config.Expect { - // we have enough nodes, set peers. - - future := s.raft.SetPeers(addrs) - - if err := future.Error(); err != nil { - s.logger.Printf("[ERR] consul: failed to leave expect mode and set peers: %v", err) - } else { - // we've left expect mode, don't enter this again - s.config.Expect = 0 - } - } - } else if err != nil { - s.logger.Printf("[ERR] consul: error retrieving index: %v", err) - } + // If we still expecting to bootstrap, may need to handle this + if s.config.Expect != 0 { + s.maybeBootstrap() } } } +// maybeBootsrap is used to handle bootstrapping when a new consul server joins +func (s *Server) maybeBootstrap() { + index, err := s.raftStore.LastIndex() + if err != nil { + s.logger.Printf("[ERR] consul: failed to read last raft index: %v", err) + return + } + + // Bootstrap can only be done if there are no committed logs, + // remove our expectations of bootstrapping + if index != 0 { + s.config.Expect = 0 + return + } + + // Scan for all the known servers + members := s.serfLAN.Members() + addrs := make([]net.Addr, 0) + for _, member := range members { + valid, p := isConsulServer(member) + if !valid { + continue + } + if p.Datacenter != s.config.Datacenter { + s.logger.Printf("[ERR] consul: Member %v has a conflicting datacenter, ignoring", member) + continue + } + if p.Expect != 0 && p.Expect != s.config.Expect { + s.logger.Printf("[ERR] consul: Member %v has a conflicting expect value. All nodes should expect the same number.", member) + return + } + if p.Bootstrap { + s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member) + return + } + addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) + } + + // Skip if we haven't met the minimum expect count + if len(addrs) < s.config.Expect { + return + } + + // Update the peer set + s.logger.Printf("[INFO] consul: Attempting bootstrap with nodes: %v", addrs) + if err := s.raft.SetPeers(addrs).Error(); err != nil { + s.logger.Printf("[ERR] consul: failed to bootstrap peers: %v", err) + } + + // Bootstrapping comlete, don't enter this again + s.config.Expect = 0 +} + // nodeFailed is used to handle fail events on both the serf clustes func (s *Server) nodeFailed(me serf.MemberEvent, wan bool) { for _, m := range me.Members { diff --git a/consul/server_test.go b/consul/server_test.go index 109f5081b8..9ad01b4aee 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -329,7 +329,7 @@ func TestServer_Expect(t *testing.T) { defer os.RemoveAll(dir2) defer s2.Shutdown() - dir3, s3 := testServerDCExpect(t, "dc1", 3) + dir3, s3 := testServerDCExpect(t, "dc1", 0) defer os.RemoveAll(dir3) defer s3.Shutdown() From f3ac3ce6fcf0601b9ff4eed37ab580fcbbe95538 Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Thu, 19 Jun 2014 13:53:43 -0500 Subject: [PATCH 09/43] Fix a small typo in RPC docs. --- website/source/docs/agent/rpc.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/agent/rpc.html.markdown b/website/source/docs/agent/rpc.html.markdown index dcc3583f59..293040a99f 100644 --- a/website/source/docs/agent/rpc.html.markdown +++ b/website/source/docs/agent/rpc.html.markdown @@ -199,7 +199,7 @@ There is no request body, or special response body. ### stats -The stats command is used to provide operator information for debugginer. +The stats command is used to provide operator information for debugging. There is no request body, the response body looks like: ``` From 26a9edff4cfc47a4eb5833b4e2854c18b412d52c Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Thu, 19 Jun 2014 16:59:57 -0400 Subject: [PATCH 10/43] ui: display session name on list of sessions under locked key --- ui/index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/index.html b/ui/index.html index ce40de71c4..b8f35f0115 100644 --- a/ui/index.html +++ b/ui/index.html @@ -247,9 +247,9 @@ {{#link-to 'nodes.show' model.session.Node tagName="div" href=false class="list-group-item list-condensed-link" }}
- {{session.Node}} + {{ sessionName session }} - {{session.ID}} + {{session.Node}}
{{/link-to}} From 924e4bc7f11acf516605966e25c7ef3b5fd58bf2 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Thu, 19 Jun 2014 17:08:48 -0700 Subject: [PATCH 11/43] Rename Expect to BootstrapExpect. Fixes #223. --- command/agent/agent.go | 4 ++-- command/agent/command.go | 21 +++++++++------------ command/agent/config.go | 24 ++++++++++++------------ command/agent/config_test.go | 16 ++++++++-------- consul/config.go | 4 ++-- consul/serf.go | 10 +++++----- consul/server.go | 4 ++-- consul/server_test.go | 2 +- 8 files changed, 41 insertions(+), 44 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 75ff489ecc..0d8cecfdf8 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -175,8 +175,8 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.RejoinAfterLeave { base.RejoinAfterLeave = true } - if a.config.Expect != 0 { - base.Expect = a.config.Expect + if a.config.BootstrapExpect != 0 { + base.BootstrapExpect = a.config.BootstrapExpect } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) diff --git a/command/agent/command.go b/command/agent/command.go index 25e04dc44d..f1f7b0d0bf 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -63,7 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") - cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") + cmdFlags.IntVar(&cmdConfig.BootstrapExpect, "bootstrap-expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -130,27 +130,24 @@ func (c *Command) readConfig() *Config { } // Expect can only work when acting as a server - if config.Expect != 0 && !config.Server { + if config.BootstrapExpect != 0 && !config.Server { c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") return nil } // Expect & Bootstrap are mutually exclusive - if config.Expect != 0 && config.Bootstrap { + if config.BootstrapExpect != 0 && config.Bootstrap { c.Ui.Error("Bootstrap cannot be provided with an expected server count") return nil } // Warn if we are in expect mode - if config.Expect != 0 { - if config.Expect == 1 { - // just use bootstrap mode - c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") - config.Expect = 0 - config.Bootstrap = true - } else { - c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) - } + if config.BootstrapExpect == 1 { + c.Ui.Error("WARNING: BootstrapExpect Mode is specified as 1; this is the same as Bootstrap mode.") + config.BootstrapExpect = 0 + config.Bootstrap = true + } else if config.BootstrapExpect > 0 { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, expecting %d servers", config.BootstrapExpect)) } // Warn if we are in bootstrap mode diff --git a/command/agent/config.go b/command/agent/config.go index c3631429a8..f08d545b18 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -65,9 +65,9 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` - // Expect tries to automatically bootstrap the Consul cluster, + // BootstrapExpect tries to automatically bootstrap the Consul cluster, // by witholding peers until enough servers join. - Expect int `mapstructure:"expect"` + BootstrapExpect int `mapstructure:"bootstrap_expect"` // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part @@ -223,14 +223,14 @@ type dirEnts []os.FileInfo // DefaultConfig is used to return a sane default configuration func DefaultConfig() *Config { return &Config{ - Bootstrap: false, - Expect: 0, - Server: false, - Datacenter: consul.DefaultDC, - Domain: "consul.", - LogLevel: "INFO", - ClientAddr: "127.0.0.1", - BindAddr: "0.0.0.0", + Bootstrap: false, + BootstrapExpect: 0, + Server: false, + Datacenter: consul.DefaultDC, + Domain: "consul.", + LogLevel: "INFO", + ClientAddr: "127.0.0.1", + BindAddr: "0.0.0.0", Ports: PortConfig{ DNS: 8600, HTTP: 8500, @@ -455,8 +455,8 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } - if b.Expect != 0 { - result.Expect = b.Expect + if b.BootstrapExpect != 0 { + result.BootstrapExpect = b.BootstrapExpect } if b.Datacenter != "" { result.Datacenter = b.Datacenter diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 0225630d08..0c6db15e1c 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -94,7 +94,7 @@ func TestDecodeConfig(t *testing.T) { } // Expect bootstrap - input = `{"server": true, "expect": 3}` + input = `{"server": true, "bootstrap_expect": 3}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) if err != nil { t.Fatalf("err: %s", err) @@ -104,7 +104,7 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } - if config.Expect != 3 { + if config.BootstrapExpect != 3 { t.Fatalf("bad: %#v", config) } @@ -441,7 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, - Expect: 0, + BootstrapExpect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -459,11 +459,11 @@ func TestMergeConfig(t *testing.T) { } b := &Config{ - Bootstrap: true, - Expect: 3, - Datacenter: "dc2", - DataDir: "/tmp/bar", - DNSRecursor: "127.0.0.2:1001", + Bootstrap: true, + BootstrapExpect: 3, + Datacenter: "dc2", + DataDir: "/tmp/bar", + DNSRecursor: "127.0.0.2:1001", DNSConfig: DNSConfig{ NodeTTL: 10 * time.Second, ServiceTTL: map[string]time.Duration{ diff --git a/consul/config.go b/consul/config.go index ae6c482823..fe4bf60010 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,10 +44,10 @@ type Config struct { // other nodes being present Bootstrap bool - // Expect mode is used to automatically bring up a collection of + // BootstrapExpect mode is used to automatically bring up a collection of // Consul servers. This can be used to automatically bring up a collection // of nodes. - Expect int + BootstrapExpect int // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/serf.go b/consul/serf.go index e31abef1a8..37aae27257 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -153,7 +153,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { } // If we still expecting to bootstrap, may need to handle this - if s.config.Expect != 0 { + if s.config.BootstrapExpect != 0 { s.maybeBootstrap() } } @@ -170,7 +170,7 @@ func (s *Server) maybeBootstrap() { // Bootstrap can only be done if there are no committed logs, // remove our expectations of bootstrapping if index != 0 { - s.config.Expect = 0 + s.config.BootstrapExpect = 0 return } @@ -186,7 +186,7 @@ func (s *Server) maybeBootstrap() { s.logger.Printf("[ERR] consul: Member %v has a conflicting datacenter, ignoring", member) continue } - if p.Expect != 0 && p.Expect != s.config.Expect { + if p.Expect != 0 && p.Expect != s.config.BootstrapExpect { s.logger.Printf("[ERR] consul: Member %v has a conflicting expect value. All nodes should expect the same number.", member) return } @@ -198,7 +198,7 @@ func (s *Server) maybeBootstrap() { } // Skip if we haven't met the minimum expect count - if len(addrs) < s.config.Expect { + if len(addrs) < s.config.BootstrapExpect { return } @@ -209,7 +209,7 @@ func (s *Server) maybeBootstrap() { } // Bootstrapping comlete, don't enter this again - s.config.Expect = 0 + s.config.BootstrapExpect = 0 } // nodeFailed is used to handle fail events on both the serf clustes diff --git a/consul/server.go b/consul/server.go index d126e2343e..5f2a7635e5 100644 --- a/consul/server.go +++ b/consul/server.go @@ -234,8 +234,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } - if s.config.Expect != 0 { - conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + if s.config.BootstrapExpect != 0 { + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.BootstrapExpect) } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput diff --git a/consul/server_test.go b/consul/server_test.go index 9ad01b4aee..70aa5811f6 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -93,7 +93,7 @@ func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { dir, config := testServerConfig(t, name) config.Datacenter = dc config.Bootstrap = false - config.Expect = expect + config.BootstrapExpect = expect server, err := NewServer(config) if err != nil { t.Fatalf("err: %v", err) From d174cbe7f461b70138cff449246ac01d2f0d290d Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 22 Jun 2014 12:49:51 -0700 Subject: [PATCH 12/43] Restore the 0.2 TLS verification behavior. Namely, don't check the DNS names in TLS certificates when connecting to other servers. As of golang 1.3, crypto/tls no longer natively supports doing partial verification (verifying the cert issuer but not the hostname), so we have to disable verification entirely and then do the issuer verification ourselves. Fortunately, crypto/x509 makes this relatively straightforward. If the "server_name" configuration option is passed, we preserve the existing behavior of checking that server name everywhere. No option is provided to retain the current behavior of checking the remote certificate against the local node name, since that behavior seems clearly buggy and unintentional, and I have difficulty imagining it is actually being used anywhere. It would be relatively straightforward to restore if desired, however. --- consul/client.go | 6 ++-- consul/config.go | 68 +++++++++++++++++++++++++++++++++++++++---- consul/config_test.go | 43 ++++++++++++++++++++------- consul/pool.go | 6 +++- consul/raft_rpc.go | 5 +++- consul/server.go | 9 ++---- 6 files changed, 109 insertions(+), 28 deletions(-) diff --git a/consul/client.go b/consul/client.go index 5b1ea29467..92d9231959 100644 --- a/consul/client.go +++ b/consul/client.go @@ -88,10 +88,8 @@ func NewClient(config *Config) (*Client, error) { // Create the tlsConfig var tlsConfig *tls.Config var err error - if config.VerifyOutgoing { - if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { - return nil, err - } + if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { + return nil, err } // Create a logger diff --git a/consul/config.go b/consul/config.go index 6000177a8f..4904880498 100644 --- a/consul/config.go +++ b/consul/config.go @@ -172,16 +172,21 @@ func (c *Config) KeyPair() (*tls.Certificate, error) { return &cert, err } -// OutgoingTLSConfig generates a TLS configuration for outgoing requests +// OutgoingTLSConfig generates a TLS configuration for outgoing +// requests. It will return a nil config if this configuration should +// not use TLS for outgoing connections. func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { + if !c.VerifyOutgoing { + return nil, nil + } // Create the tlsConfig tlsConfig := &tls.Config{ - ServerName: c.ServerName, RootCAs: x509.NewCertPool(), - InsecureSkipVerify: !c.VerifyOutgoing, + InsecureSkipVerify: true, } - if tlsConfig.ServerName == "" { - tlsConfig.ServerName = c.NodeName + if c.ServerName != "" { + tlsConfig.ServerName = c.ServerName + tlsConfig.InsecureSkipVerify = false } // Ensure we have a CA if VerifyOutgoing is set @@ -206,6 +211,59 @@ func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { return tlsConfig, nil } +// Wrap a net.Conn into a tls connection, performing any additional +// verification as needed. +// +// As of go 1.3, crypto/tls only supports either doing no certificate +// verification, or doing full verification including of the peer's +// DNS name. For consul, we want to validate that the certificate is +// signed by a known CA, but because consul doesn't use DNS names for +// node names, we don't verify the certificate DNS names. Since go 1.3 +// no longer supports this mode of operation, we have to do it +// manually. +func wrapTLSClient(conn net.Conn, tlsConfig *tls.Config) (net.Conn, error) { + var err error + var tlsConn *tls.Conn + + tlsConn = tls.Client(conn, tlsConfig) + + // If crypto/tls is doing verification, there's no need to do + // our own. + if tlsConfig.InsecureSkipVerify == false { + return tlsConn, nil + } + + if err = tlsConn.Handshake(); err != nil { + tlsConn.Close() + return nil, err + } + + // The following is lightly-modified from the doFullHandshake + // method in crypto/tls's handshake_client.go. + opts := x509.VerifyOptions{ + Roots: tlsConfig.RootCAs, + CurrentTime: time.Now(), + DNSName: "", + Intermediates: x509.NewCertPool(), + } + + certs := tlsConn.ConnectionState().PeerCertificates + for i, cert := range certs { + if i == 0 { + continue + } + opts.Intermediates.AddCert(cert) + } + + _, err = certs[0].Verify(opts) + if err != nil { + tlsConn.Close() + return nil, err + } + + return tlsConn, err +} + // IncomingTLSConfig generates a TLS configuration for incoming requests func (c *Config) IncomingTLSConfig() (*tls.Config, error) { // Create the tlsConfig diff --git a/consul/config_test.go b/consul/config_test.go index c6081603ee..dc20cf942f 100644 --- a/consul/config_test.go +++ b/consul/config_test.go @@ -78,14 +78,8 @@ func TestConfig_OutgoingTLS_OnlyCA(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - if tls == nil { - t.Fatalf("expected config") - } - if len(tls.RootCAs.Subjects()) != 1 { - t.Fatalf("expect root cert") - } - if !tls.InsecureSkipVerify { - t.Fatalf("expect to skip verification") + if tls != nil { + t.Fatalf("expected no config") } } @@ -104,8 +98,35 @@ func TestConfig_OutgoingTLS_VerifyOutgoing(t *testing.T) { if len(tls.RootCAs.Subjects()) != 1 { t.Fatalf("expect root cert") } + if tls.ServerName != "" { + t.Fatalf("expect no server name verification") + } + if !tls.InsecureSkipVerify { + t.Fatalf("should skip built-in verification") + } +} + +func TestConfig_OutgoingTLS_ServerName(t *testing.T) { + conf := &Config{ + VerifyOutgoing: true, + CAFile: "../test/ca/root.cer", + ServerName: "consul.example.com", + } + tls, err := conf.OutgoingTLSConfig() + if err != nil { + t.Fatalf("err: %v", err) + } + if tls == nil { + t.Fatalf("expected config") + } + if len(tls.RootCAs.Subjects()) != 1 { + t.Fatalf("expect root cert") + } + if tls.ServerName != "consul.example.com" { + t.Fatalf("expect server name") + } if tls.InsecureSkipVerify { - t.Fatalf("should not skip verification") + t.Fatalf("should not skip built-in verification") } } @@ -126,8 +147,8 @@ func TestConfig_OutgoingTLS_WithKeyPair(t *testing.T) { if len(tls.RootCAs.Subjects()) != 1 { t.Fatalf("expect root cert") } - if tls.InsecureSkipVerify { - t.Fatalf("should not skip verification") + if !tls.InsecureSkipVerify { + t.Fatalf("should skip verification") } if len(tls.Certificates) != 1 { t.Fatalf("expected client cert") diff --git a/consul/pool.go b/consul/pool.go index 804a900f5a..3b7e80c297 100644 --- a/consul/pool.go +++ b/consul/pool.go @@ -221,7 +221,11 @@ func (p *ConnPool) getNewConn(addr net.Addr, version int) (*Conn, error) { } // Wrap the connection in a TLS client - conn = tls.Client(conn, p.tlsConfig) + conn, err = wrapTLSClient(conn, p.tlsConfig) + if err != nil { + conn.Close() + return nil, err + } } // Switch the multiplexing based on version diff --git a/consul/raft_rpc.go b/consul/raft_rpc.go index 1221ce06f2..1024cd9878 100644 --- a/consul/raft_rpc.go +++ b/consul/raft_rpc.go @@ -94,7 +94,10 @@ func (l *RaftLayer) Dial(address string, timeout time.Duration) (net.Conn, error } // Wrap the connection in a TLS client - conn = tls.Client(conn, l.tlsConfig) + conn, err = wrapTLSClient(conn, l.tlsConfig) + if err != nil { + return nil, err + } } // Write the Raft byte to set the mode diff --git a/consul/server.go b/consul/server.go index e7dd195f34..8af3fd4ef8 100644 --- a/consul/server.go +++ b/consul/server.go @@ -145,12 +145,9 @@ func NewServer(config *Config) (*Server, error) { } // Create the tlsConfig for outgoing connections - var tlsConfig *tls.Config - var err error - if config.VerifyOutgoing { - if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { - return nil, err - } + tlsConfig, err := config.OutgoingTLSConfig() + if err != nil { + return nil, err } // Get the incoming tls config From 12a7f765b61c7c9b62533a29a205cbe2f7af3b72 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 29 Jun 2014 18:11:32 -0700 Subject: [PATCH 13/43] Add some basic smoke tests for wrapTLSclient. Check the success case, and check that we reject a self-signed certificate. --- consul/config.go | 4 +- consul/config_test.go | 96 ++++++++++++++++++++++++++++++++++ test/key/ssl-cert-snakeoil.key | 28 ++++++++++ test/key/ssl-cert-snakeoil.pem | 17 ++++++ 4 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 test/key/ssl-cert-snakeoil.key create mode 100644 test/key/ssl-cert-snakeoil.pem diff --git a/consul/config.go b/consul/config.go index 4904880498..9aa0c86234 100644 --- a/consul/config.go +++ b/consul/config.go @@ -211,8 +211,8 @@ func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { return tlsConfig, nil } -// Wrap a net.Conn into a tls connection, performing any additional -// verification as needed. +// Wrap a net.Conn into a client tls connection, performing any +// additional verification as needed. // // As of go 1.3, crypto/tls only supports either doing no certificate // verification, or doing full verification including of the peer's diff --git a/consul/config_test.go b/consul/config_test.go index dc20cf942f..1007ffba77 100644 --- a/consul/config_test.go +++ b/consul/config_test.go @@ -3,6 +3,9 @@ package consul import ( "crypto/tls" "crypto/x509" + "io" + "io/ioutil" + "net" "testing" ) @@ -155,6 +158,99 @@ func TestConfig_OutgoingTLS_WithKeyPair(t *testing.T) { } } +func startTLSServer(config *Config) (net.Conn, chan error) { + errc := make(chan error, 1) + + tlsConfigServer, err := config.IncomingTLSConfig() + if err != nil { + errc <- err + return nil, errc + } + + client, server := net.Pipe() + go func() { + tlsServer := tls.Server(server, tlsConfigServer) + if err := tlsServer.Handshake(); err != nil { + errc <- err + } + close(errc) + // Because net.Pipe() is unbuffered, if both sides + // Close() simultaneously, we will deadlock as they + // both send an alert and then block. So we make the + // server read any data from the client until error or + // EOF, which will allow the client to Close(), and + // *then* we Close() the server. + io.Copy(ioutil.Discard, tlsServer) + tlsServer.Close() + }() + return client, errc +} + +func TestConfig_wrapTLS_OK(t *testing.T) { + config := &Config{ + CAFile: "../test/ca/root.cer", + CertFile: "../test/key/ourdomain.cer", + KeyFile: "../test/key/ourdomain.key", + VerifyOutgoing: true, + } + + client, errc := startTLSServer(config) + if client == nil { + t.Fatalf("startTLSServer err: %v", <-errc) + } + + clientConfig, err := config.OutgoingTLSConfig() + if err != nil { + t.Fatalf("OutgoingTLSConfig err: %v", err) + } + + tlsClient, err := wrapTLSClient(client, clientConfig) + if err != nil { + t.Fatalf("wrapTLS err: %v", err) + } else { + tlsClient.Close() + } + err = <-errc + if err != nil { + t.Fatalf("server: %v", err) + } +} + +func TestConfig_wrapTLS_BadCert(t *testing.T) { + serverConfig := &Config{ + CertFile: "../test/key/ssl-cert-snakeoil.pem", + KeyFile: "../test/key/ssl-cert-snakeoil.key", + } + + client, errc := startTLSServer(serverConfig) + if client == nil { + t.Fatalf("startTLSServer err: %v", <-errc) + } + + clientConfig := &Config{ + CAFile: "../test/ca/root.cer", + VerifyOutgoing: true, + } + + clientTLSConfig, err := clientConfig.OutgoingTLSConfig() + if err != nil { + t.Fatalf("OutgoingTLSConfig err: %v", err) + } + + tlsClient, err := wrapTLSClient(client, clientTLSConfig) + if err == nil { + t.Fatalf("wrapTLS no err") + } + if tlsClient != nil { + t.Fatalf("returned a client") + } + + err = <-errc + if err != nil { + t.Fatalf("server: %v", err) + } +} + func TestConfig_IncomingTLS(t *testing.T) { conf := &Config{ VerifyIncoming: true, diff --git a/test/key/ssl-cert-snakeoil.key b/test/key/ssl-cert-snakeoil.key new file mode 100644 index 0000000000..22cc4acb14 --- /dev/null +++ b/test/key/ssl-cert-snakeoil.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDYVw5skn/3Ka72 +32ZaCrKtRVoQzan3tghq41KpQe3yZxIZbKy7sbwfdXnXVSwTAbq/3BYi9rya2t/v +W95yZh6JgfrLBvWl9Jo1EttZIxDhzCXGP+MPWm2KdNtHr84JznJbdxRpR0Jb4ykK +2d9dXbLJvCw8eEDFgOVGrj60USMir46sZFRvGWlMi+yHSOE+WQXaU40Dr0ZJqNvd +RNO9BtqpLaecZQaYTvlkyVdhjUE3+gQ0zEAQqpLcWi+zB5/IyR2+KwxDT3vAJumd +G7rIaGatPE8k0Ahb+zMKFFGYCoQ3sjbAbrQmrVtH4SU6ggl+CxpVdxshrK1W05Ms +WAiPw81/AgMBAAECggEAKjDIKlpjxGMHsTOeNV8yu2H0D6TcSefhOl885q9p5UU+ +nWC5Sx19b7EsYtdEcix7LCGS25y86YJX+8kx16OcvvpvW5ru2z+Zt1IHHxocl7yF +fWVGNd9Pz5m8jf12NClj2fyeKW3xPhROE8Srr/yu+nLNObnF//6EOEWRCv9r176C ++dzYvYVNPP48Ug7NpjQB94CBprtJyqvuoXvBPtpARXazVniYEhnzG1Gaj1TiCII5 ++emaMjKcWIEJ5stbBb3lUtqgm8bRNb/qcxoFfqTzHP+hbum9hbRz0KEIlAkm7uAv +S0TlyLuaj+gPQ+LwNX8EhGKUdlK/VM5bj2kq/tg3AQKBgQD/+A8ruHNa5nKGKNzP +dp+hXiL2sSzefMjDa2+sRJ0yftIMqYRfCJwzYumjfyycfCsu1LHainlQjSO6Kkgc +c0xVxnahWyPCQiqZuo9lLx4EVXCdRqWRg+pbyQhTSz90hfWEKD7XWsI8uRkOEnW8 +36FiyovGDFxl0esaKrFNSFdmgQKBgQDYXcSIRJk41f7vL6FVmchpUnVYoD75k9YT +FqEplNMw6gXcqbC2aNH5wj7EJlRboyVpjXV4N0d2Cz6AwREJpr/rYpq68AixXmVs +kTKwevoHm/tln7CN+CyIEy6KXdLp4KoWLFfSG6tHWRwIGFxWEGrrIZS6Eznu4GPe +V2yOnMkz/wKBgC6nXtSALP5PbGZJgl2J6HR3/PVru5rdsZX0ugjzBJfUh6JpL0hH +AHlZOO5k2pO3CgPiHnyPqqbk4rMmy7frx+kGYE7ulqjseGlGmKY/nT/69qij3L+W +BJwwGwVbfLhXRjWNRE7qKub4cbmf4bfIJtkjw7AYRqsERM6jI2fLnKqBAoGAUBzY +CkSsHxlNXa7bI+DfDfBUNs6OwsZ0e3jjj4vlbrUYGo5SOhgxtzKvHt26Wnvb/Gs+ +VZbSROkA6ZeTAWnWogdOl20NKu9yynIwvJusPGkK+qPYMZj0lCXWE7GNyL9A+xjM +I6XPE4nxESZD+jH2BL3YXdWEm+hF0iu4rE1tSm0CgYEAxssvvX7qcfTmxsp1YSHJ +H5j9ifkakci5W2VbCbdMtdOlgIlCFr2JYguaL98jx7WIJ4iH54ue/fbOdlkPCOsz +YGU4TceSRHeEJ7F6c67NOXm8j2TquAW2uYH87w07g2PIUwl/pp439qoDiThA6jEX +2ztyXgNUi7poqehPUoQuvC0= +-----END PRIVATE KEY----- diff --git a/test/key/ssl-cert-snakeoil.pem b/test/key/ssl-cert-snakeoil.pem new file mode 100644 index 0000000000..b8ad2c8a6a --- /dev/null +++ b/test/key/ssl-cert-snakeoil.pem @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIICsjCCAZqgAwIBAgIJAMi7aUCplU3VMA0GCSqGSIb3DQEBBQUAMBExDzANBgNV +BAMTBnVidW50dTAeFw0xMjEyMDIwNDQ3MzBaFw0yMjExMzAwNDQ3MzBaMBExDzAN +BgNVBAMTBnVidW50dTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANhX +DmySf/cprvbfZloKsq1FWhDNqfe2CGrjUqlB7fJnEhlsrLuxvB91eddVLBMBur/c +FiL2vJra3+9b3nJmHomB+ssG9aX0mjUS21kjEOHMJcY/4w9abYp020evzgnOclt3 +FGlHQlvjKQrZ311dssm8LDx4QMWA5UauPrRRIyKvjqxkVG8ZaUyL7IdI4T5ZBdpT +jQOvRkmo291E070G2qktp5xlBphO+WTJV2GNQTf6BDTMQBCqktxaL7MHn8jJHb4r +DENPe8Am6Z0bushoZq08TyTQCFv7MwoUUZgKhDeyNsButCatW0fhJTqCCX4LGlV3 +GyGsrVbTkyxYCI/DzX8CAwEAAaMNMAswCQYDVR0TBAIwADANBgkqhkiG9w0BAQUF +AAOCAQEAQaS5yAih5NBV2edX1wkIQfAUElqmzoXvxsozDYy+P+S5tJeFXDSqzTAy +qkd/6qjkBdaARfKUJZeT/jRjqxoNtE9SR4PMOnD4zrqD26ujgZRVtPImbmVxCnMI +1B9LwvhpDHZuPGN5bPp3o+iDYea8zkS3Y31Ic889KSwKBDb1LlNogOdved+2DGd1 +yCxEErImbl4B0+QPrRk2bWbDfKhDfJ2FV+9kWIoEuCQBpr2tj1E5zvTadOVm5P2M +u7kjGl4w0GIAONiMC9l2TwMmPuG1jpM/WjQkG0sTKOCl7xQKgXBNJ78Wm2bfGtgb +shr/PNbS/EyISlUa07+zJtiRnr/EiQ== +-----END CERTIFICATE----- From cb8fcf613de5e944c6393a9e9d5cdde911705577 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 29 Jun 2014 18:22:44 -0700 Subject: [PATCH 14/43] Fix a Makefile typo. The broken .PHONY declaration is breaking Travis: 'make deps' is now a no-op, because of the new 'deps' directory. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e4a546264..8d8e2f078a 100644 --- a/Makefile +++ b/Makefile @@ -30,4 +30,4 @@ web: web-push: ./scripts/website_push.sh -.PNONY: all cov deps integ test web web-push +.PHONY: all cov deps integ test web web-push From d313349807a06a29b71418557533c84003a41138 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 1 Jul 2014 15:02:26 -0700 Subject: [PATCH 15/43] Updating documentation for new bootstrap method --- command/agent/command.go | 2 +- .../source/docs/agent/basics.html.markdown | 3 +- .../source/docs/agent/options.html.markdown | 17 ++-- .../docs/guides/bootstrapping.html.markdown | 84 ++++++++----------- .../source/docs/guides/outage.html.markdown | 2 +- .../source/docs/guides/servers.html.markdown | 3 +- .../intro/getting-started/agent.html.markdown | 13 ++- .../intro/getting-started/join.html.markdown | 9 +- .../getting-started/services.html.markdown | 2 +- 9 files changed, 64 insertions(+), 71 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index f1f7b0d0bf..6410383eaf 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -537,6 +537,7 @@ Options: -advertise=addr Sets the advertise address to use -bootstrap Sets server to bootstrap mode -bind=0.0.0.0 Sets the bind address for cluster communication + -bootstrap-expect=0 Sets server to expect bootstrap mode. -client=127.0.0.1 Sets the address to bind for client access. This includes RPC, DNS and HTTP -config-file=foo Path to a JSON file to read configuration from. @@ -547,7 +548,6 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent - -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/website/source/docs/agent/basics.html.markdown b/website/source/docs/agent/basics.html.markdown index ba05f8a196..a63d90a04d 100644 --- a/website/source/docs/agent/basics.html.markdown +++ b/website/source/docs/agent/basics.html.markdown @@ -57,8 +57,7 @@ There are several important components that `consul agent` outputs: * **Server**: This shows if the agent is running in the server or client mode. Server nodes have the extra burden of participating in the consensus quorum, storing cluster state, and handling queries. Additionally, a server may be - in "bootstrap" mode. The first server must be in this mode to allow additional - servers to join the cluster. Multiple servers cannot be in bootstrap mode, + in "bootstrap" mode. Multiple servers cannot be in bootstrap mode, otherwise the cluster state will be inconsistent. * **Client Addr**: This is the address used for client interfaces to the agent. diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown index c134b73a51..bfafab5e8f 100644 --- a/website/source/docs/agent/options.html.markdown +++ b/website/source/docs/agent/options.html.markdown @@ -35,11 +35,16 @@ The options below are all specified on the command-line. as other nodes will treat the non-routability as a failure. * `-bootstrap` - This flag is used to control if a server is in "bootstrap" mode. It is important that - no more than one server *per* datacenter be running in this mode. The initial server **must** be in bootstrap - mode. Technically, a server in bootstrap mode is allowed to self-elect as the Raft leader. It is important - that only a single node is in this mode, because otherwise consistency cannot be guaranteed if multiple - nodes are able to self-elect. Once there are multiple servers in a datacenter, it is generally a good idea - to disable bootstrap mode on all of them. + no more than one server *per* datacenter be running in this mode. Technically, a server in bootstrap mode + is allowed to self-elect as the Raft leader. It is important that only a single node is in this mode, + because otherwise consistency cannot be guaranteed if multiple nodes are able to self-elect. + It is not recommended to use this flag after a cluster has been bootstrapped. + +* `-bootstrap-expect` - This flag provides the number of expected servers in the datacenter. + Either this value should not be provided, or the value must agree with other servers in + the cluster. When provided, Consul waits until the specified number of servers are + available, and then bootstraps the cluster. This allows an initial leader to be elected + automatically. This cannot be used in conjunction with the `-bootstrap` flag. * `-bind` - The address that should be bound to for internal cluster communications. This is an IP address that should be reachable by all other nodes in the cluster. @@ -148,6 +153,8 @@ definitions support being updated during a reload. * `bootstrap` - Equivalent to the `-bootstrap` command-line flag. +* `bootstrap_expect` - Equivalent to the `-bootstrap-expect` command-line flag. + * `bind_addr` - Equivalent to the `-bind` command-line flag. * `client_addr` - Equivalent to the `-client` command-line flag. diff --git a/website/source/docs/guides/bootstrapping.html.markdown b/website/source/docs/guides/bootstrapping.html.markdown index 6339e59ccf..472a949f4c 100644 --- a/website/source/docs/guides/bootstrapping.html.markdown +++ b/website/source/docs/guides/bootstrapping.html.markdown @@ -6,74 +6,62 @@ sidebar_current: "docs-guides-bootstrapping" # Bootstrapping a Datacenter -When deploying Consul to a datacenter for the first time, there is an initial bootstrapping that -must be done. Generally, the first nodes that are started are the server nodes. Remember that an -agent can run in both client and server mode. Server nodes are responsible for running +Before a Consul cluster can begin to service requests, it is necessary for a server node to +be elected leader. For this reason, the first nodes that are started are generally the server nodes. +Remember that an agent can run in both client and server mode. Server nodes are responsible for running the [consensus protocol](/docs/internals/consensus.html), and storing the cluster state. The client nodes are mostly stateless and rely on the server nodes, so they can be started easily. -The first server that is deployed in a new datacenter must provide the `-bootstrap` [configuration -option](/docs/agent/options.html). This option allows the server to assert leadership of the cluster -without agreement from any other server. This is necessary because at this point, there are no other -servers running in the datacenter! Lets call this first server `Node A`. When starting `Node A` something -like the following will be logged: +The recommended way to bootstrap is to use the `-bootstrap-expect` [configuration +option](/docs/agent/options.html). This options informs Consul of the expected number of +server nodes, and automatically bootstraps when that many servers are available. To prevent +inconsistencies and split-brain situations, all servers should specify the same value for `-bootstrap-expect` +or specify no value at all. Any server that does not specify a value will not attempt to +bootstrap the cluster. - 2014/02/22 19:23:32 [INFO] consul: cluster leadership acquired +There is a [deployment table](/docs/internals/consensus.html#toc_3) that covers various options, +but it is recommended to have 3 or 5 total servers per data center. A single server deployment is _**highly**_ +discouraged as data loss is inevitable in a failure scenario. -Once `Node A` is running, we can start the next set of servers. There is a [deployment table](/docs/internals/consensus.html#toc_3) -that covers various options, but it is recommended to have 3 or 5 total servers per data center. -A single server deployment is _**highly**_ discouraged as data loss is inevitable in a failure scenario. -We start the next servers **without** specifying `-bootstrap`. This is critical, since only one server -should ever be running in bootstrap mode*. Once `Node B` and `Node C` are started, you should see a -message to the effect of: +Suppose we are starting a 3 server cluster, we can start `Node A`, `Node B` and `Node C` providing +the `-bootstrap-expect 3` flag. Once the nodes are started, you should see a message to the effect of: [WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election. -This indicates that the node is not in bootstrap mode, and it will not elect itself as leader. -We can now join these machines together. Since a join operation is symmetric it does not matter -which node initiates it. From `Node B` and `Node C` you can do the following: +This indicates that the nodes are expecting 2 peers, but none are known yet. The servers will not elect +themselves leader to prevent a split-brain. We can now join these machines together. Since a join operation +is symmetric it does not matter which node initiates it. From any node you can do the following: - $ consul join - Successfully joined cluster by contacting 1 nodes. + $ consul join + Successfully joined cluster by contacting 3 nodes. -Alternatively, from `Node A` you can do the following: +Once the join is successful, one of the nodes will output something like: - $ consul join - Successfully joined cluster by contacting 2 nodes. + [INFO] consul: adding server foo (Addr: 127.0.0.2:8300) (DC: dc1) + [INFO] consul: adding server bar (Addr: 127.0.0.1:8300) (DC: dc1) + [INFO] consul: Attempting bootstrap with nodes: [127.0.0.3:8300 127.0.0.2:8300 127.0.0.1:8300] + ... + [INFO] consul: cluster leadership acquired -Once the join is successful, `Node A` should output something like: - - [INFO] raft: Added peer 127.0.0.2:8300, starting replication - .... - [INFO] raft: Added peer 127.0.0.3:8300, starting replication - -Another good check is to run the `consul info` command. When run on `Node A`, you can +As a sanity check, the `consul info` command is a useful tool. It can be used to verify `raft.num_peers` is now 2, and you can view the latest log index under `raft.last_log_index`. -When running `consul info` on `Node B` and `Node C` you should see `raft.last_log_index` +When running `consul info` on the followers, you should see `raft.last_log_index` converge to the same value as the leader begins replication. That value represents the last log entry that has been stored on disk. -This indicates that `Node B` and `Node C` have been added as peers. At this point, -all three nodes see each other as peers, `Node A` is the leader, and replication -should be working. - -The final step is to remove the `-bootstrap` flag. This is important since we don't -want the node to be able to make unilateral decisions in the case of a failure of the -other two nodes. To do this, we send a `SIGINT` to `Node A` to allow it to perform -a graceful leave. Then we remove the `-bootstrap` flag and restart the node. The node -will need to rejoin the cluster, since the graceful exit leaves the cluster. Any transactions -that took place while `Node A` was offline will be replicated and the node will catch up. - Now that the servers are all started and replicating to each other, all the remaining clients can be joined. Clients are much easier, as they can be started and perform a `join` against any existing node. All nodes participate in a gossip protocol to perform basic discovery, so clients will automatically find the servers and register themselves. -
-* If you accidentally start another server with the flag set, do not fret. -Shutdown the node, and remove the `raft/` folder from the data directory. This will -remove the bad state caused by being in `-bootstrap` mode. Then restart the -node and join the cluster normally. -
+It should be noted that it is not strictly necessary to start the server nodes +before the clients, however most operations will fail until the servers are available. + +## Manual Bootstrapping + +In versions of Consul previous to 0.4, bootstrapping was a more manual process. +For a guide on using the `-bootstrap` flag directly, see the [manual bootstrapping guide](/docs/guides/manual-bootstrap.html). + +This is not recommended, as it is more error prone than automatic bootstrapping. diff --git a/website/source/docs/guides/outage.html.markdown b/website/source/docs/guides/outage.html.markdown index 13f4379179..893cd66949 100644 --- a/website/source/docs/guides/outage.html.markdown +++ b/website/source/docs/guides/outage.html.markdown @@ -18,7 +18,7 @@ add or remove a server see this page. If you had only a single server and it has failed, simply restart it. -Note that a single server configuration requires the `-bootstrap` flag. +Note that a single server configuration requires the `-bootstrap` or `-bootstrap-expect 1` flag. If that server cannot be recovered, you need to bring up a new server. See the [bootstrapping guide](/docs/guides/bootstrapping.html). Data loss is inevitable, since data was not replicated to any other servers. This diff --git a/website/source/docs/guides/servers.html.markdown b/website/source/docs/guides/servers.html.markdown index 64b4583f04..9cf535bed5 100644 --- a/website/source/docs/guides/servers.html.markdown +++ b/website/source/docs/guides/servers.html.markdown @@ -18,8 +18,7 @@ to first add the new nodes and then remove the old nodes. ## Adding New Servers -Adding new servers is generally straightforward. After the initial server, no further -servers should ever be started with the `-bootstrap` flag. Instead, simply start the new +Adding new servers is generally straightforward. Simply start the new server with the `-server` flag. At this point, the server will not be a member of any cluster, and should emit something like: diff --git a/website/source/intro/getting-started/agent.html.markdown b/website/source/intro/getting-started/agent.html.markdown index d9709eb161..1a75c75448 100644 --- a/website/source/intro/getting-started/agent.html.markdown +++ b/website/source/intro/getting-started/agent.html.markdown @@ -20,7 +20,8 @@ will be part of the cluster. For simplicity, we'll run a single Consul agent in server mode right now: ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul +==> WARNING: BootstrapExpect Mode is specified as 1; this is the same as Bootstrap mode. ==> WARNING: Bootstrap mode enabled! Do not enable unless necessary ==> WARNING: It is highly recommended to set GOMAXPROCS higher than 1 ==> Starting Consul agent... @@ -67,15 +68,13 @@ joining clusters in the next section. ``` $ consul members -Armons-MacBook-Air 10.1.10.38:8301 alive role=consul,dc=dc1,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1 +Node Address Status Type Build Protocol +Armons-MacBook-Air 10.1.10.38:8301 alive server 0.3.0 2 ``` The output shows our own node, the address it is running on, its -health state, and some metadata associated with the node. Some important -metadata keys to recognize are the `role` and `dc` keys. These tell you -the service name and the datacenter that member is within. These can be -used to lookup nodes and services using the DNS interface, which is covered -shortly. +health state, its role in the cluster, as well as some versioning information. +Additional metadata can be viewed by providing the `-detailed` flag. The output from the `members` command is generated based on the [gossip protocol](/docs/internals/gossip.html) and is eventually consistent. diff --git a/website/source/intro/getting-started/join.html.markdown b/website/source/intro/getting-started/join.html.markdown index e369b91022..53bc44fa62 100644 --- a/website/source/intro/getting-started/join.html.markdown +++ b/website/source/intro/getting-started/join.html.markdown @@ -34,7 +34,7 @@ will act as our server in this cluster. We're still not making a cluster of servers. ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul \ +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul \ -node=agent-one -bind=172.20.20.10 ... ``` @@ -70,9 +70,10 @@ run `consul members` against each agent, you'll see that both agents now know about each other: ``` -$ consul members -agent-one 172.20.20.10:8301 alive role=consul,dc=dc1,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1 -agent-two 172.20.20.11:8301 alive role=node,dc=dc1,vsn=1,vsn_min=1,vsn_max=1 +$ consul members -detailed +Node Address Status Tags +agent-one 172.20.20.10:8301 alive role=consul,dc=dc1,vsn=2,vsn_min=1,vsn_max=2,port=8300,bootstrap=1 +agent-two 172.20.20.11:8301 alive role=node,dc=dc1,vsn=2,vsn_min=1,vsn_max=2 ```
diff --git a/website/source/intro/getting-started/services.html.markdown b/website/source/intro/getting-started/services.html.markdown index f67cf31d95..8d1329cdb2 100644 --- a/website/source/intro/getting-started/services.html.markdown +++ b/website/source/intro/getting-started/services.html.markdown @@ -43,7 +43,7 @@ $ echo '{"service": {"name": "web", "tags": ["rails"], "port": 80}}' \ Now, restart the agent we're running, providing the configuration directory: ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul -config-dir /etc/consul.d +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul -config-dir /etc/consul.d ==> Starting Consul agent... ... [INFO] agent: Synced service 'web' From bc4b5ccd48029855d562aca69d47be808919600e Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 1 Jul 2014 15:02:42 -0700 Subject: [PATCH 16/43] website: Keep old bootstrapping docs --- .../guides/manual-bootstrap.html.markdown | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 website/source/docs/guides/manual-bootstrap.html.markdown diff --git a/website/source/docs/guides/manual-bootstrap.html.markdown b/website/source/docs/guides/manual-bootstrap.html.markdown new file mode 100644 index 0000000000..f1375031ee --- /dev/null +++ b/website/source/docs/guides/manual-bootstrap.html.markdown @@ -0,0 +1,83 @@ +--- +layout: "docs" +page_title: "Manual Bootstrapping" +sidebar_current: "docs-guides-bootstrapping" +--- + +# Manually Bootstrapping a Datacenter + +When deploying Consul to a datacenter for the first time, there is an initial bootstrapping that +must be done. As of Consul 0.4, an [automatic bootstrapping](/docs/guides/bootstrapping.html) is +available and is the recommended approach. However, older versions only support a manual bootstrap +that is documented here. + +Generally, the first nodes that are started are the server nodes. Remember that an +agent can run in both client and server mode. Server nodes are responsible for running +the [consensus protocol](/docs/internals/consensus.html), and storing the cluster state. +The client nodes are mostly stateless and rely on the server nodes, so they can be started easily. + +Manual bootstrapping requires that the first server that is deployed in a new datacenter provide +the `-bootstrap` [configuration option](/docs/agent/options.html). This option allows the server to +assert leadership of the cluster without agreement from any other server. This is necessary because +at this point, there are no other servers running in the datacenter! Lets call this first server `Node A`. +When starting `Node A` something like the following will be logged: + + 2014/02/22 19:23:32 [INFO] consul: cluster leadership acquired + +Once `Node A` is running, we can start the next set of servers. There is a [deployment table](/docs/internals/consensus.html#toc_3) +that covers various options, but it is recommended to have 3 or 5 total servers per data center. +A single server deployment is _**highly**_ discouraged as data loss is inevitable in a failure scenario. +We start the next servers **without** specifying `-bootstrap`. This is critical, since only one server +should ever be running in bootstrap mode*. Once `Node B` and `Node C` are started, you should see a +message to the effect of: + + [WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election. + +This indicates that the node is not in bootstrap mode, and it will not elect itself as leader. +We can now join these machines together. Since a join operation is symmetric it does not matter +which node initiates it. From `Node B` and `Node C` you can do the following: + + $ consul join + Successfully joined cluster by contacting 1 nodes. + +Alternatively, from `Node A` you can do the following: + + $ consul join + Successfully joined cluster by contacting 2 nodes. + +Once the join is successful, `Node A` should output something like: + + [INFO] raft: Added peer 127.0.0.2:8300, starting replication + .... + [INFO] raft: Added peer 127.0.0.3:8300, starting replication + +As a sanity check, the `consul info` command is a useful tool. It can be used to +verify `raft.num_peers` is now 2, and you can view the latest log index under `raft.last_log_index`. +When running `consul info` on the followers, you should see `raft.last_log_index` +converge to the same value as the leader begins replication. That value represents the last +log entry that has been stored on disk. + +This indicates that `Node B` and `Node C` have been added as peers. At this point, +all three nodes see each other as peers, `Node A` is the leader, and replication +should be working. + +The final step is to remove the `-bootstrap` flag. This is important since we don't +want the node to be able to make unilateral decisions in the case of a failure of the +other two nodes. To do this, we send a `SIGINT` to `Node A` to allow it to perform +a graceful leave. Then we remove the `-bootstrap` flag and restart the node. The node +will need to rejoin the cluster, since the graceful exit leaves the cluster. Any transactions +that took place while `Node A` was offline will be replicated and the node will catch up. + +Now that the servers are all started and replicating to each other, all the remaining +clients can be joined. Clients are much easier, as they can be started and perform +a `join` against any existing node. All nodes participate in a gossip protocol to +perform basic discovery, so clients will automatically find the servers and register +themselves. + +
+* If you accidentally start another server with the flag set, do not fret. +Shutdown the node, and remove the `raft/` folder from the data directory. This will +remove the bad state caused by being in `-bootstrap` mode. Then restart the +node and join the cluster normally. +
+ From 3d1e09c263bbd2c259b4f19e1e44c37f7c641b99 Mon Sep 17 00:00:00 2001 From: Ben Scofield Date: Wed, 2 Jul 2014 21:21:37 +0200 Subject: [PATCH 17/43] Improve clarity around changing the `-client` param This is a small change that (IMO) makes more explicit the effect of changing `-client` -- namely, that you'll have to provide `-rpc-addr` to every other consul command you run. --- website/source/docs/agent/basics.html.markdown | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/basics.html.markdown b/website/source/docs/agent/basics.html.markdown index a63d90a04d..9061c5d208 100644 --- a/website/source/docs/agent/basics.html.markdown +++ b/website/source/docs/agent/basics.html.markdown @@ -65,9 +65,9 @@ There are several important components that `consul agent` outputs: address is used for other `consul` commands. Other Consul commands such as `consul members` connect to a running agent and use RPC to query and control the agent. By default, this binds only to localhost. If you - change this address or port, you'll have to specify an `-rpc-addr` to commands - such as `consul members` so they know how to talk to the agent. This is also - the address other applications can use over [RPC to control Consul](/docs/agent/rpc.html). + change this address or port, you'll have to specify an `-rpc-addr` whenever + you run commands such as `consul members` so they know how to talk to the + agent. This is also the address other applications can use over [RPC to control Consul](/docs/agent/rpc.html). * **Cluster Addr**: This is the address and ports used for communication between Consul agents in a cluster. Every Consul agent in a cluster does not have to From 135c4095731e3da9d5d54fd1f1b502ac262beec4 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Sat, 5 Jul 2014 09:49:10 -0700 Subject: [PATCH 18/43] agent: Fixing passing filter. Fixes #241 --- command/agent/health_endpoint.go | 2 ++ command/agent/health_endpoint_test.go | 37 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/command/agent/health_endpoint.go b/command/agent/health_endpoint.go index 8462e0a4b2..3ee02ac5e6 100644 --- a/command/agent/health_endpoint.go +++ b/command/agent/health_endpoint.go @@ -117,6 +117,7 @@ func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Requ // filterNonPassing is used to filter out any nodes that have check that are not passing func filterNonPassing(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { n := len(nodes) +OUTER: for i := 0; i < n; i++ { node := nodes[i] for _, check := range node.Checks { @@ -124,6 +125,7 @@ func filterNonPassing(nodes structs.CheckServiceNodes) structs.CheckServiceNodes nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} n-- i-- + continue OUTER } } } diff --git a/command/agent/health_endpoint_test.go b/command/agent/health_endpoint_test.go index c1f75a2698..40ceedb0f3 100644 --- a/command/agent/health_endpoint_test.go +++ b/command/agent/health_endpoint_test.go @@ -7,6 +7,7 @@ import ( "net/http" "net/http/httptest" "os" + "reflect" "testing" ) @@ -182,3 +183,39 @@ func TestHealthServiceNodes_PassingFilter(t *testing.T) { t.Fatalf("bad: %v", obj) } } + +func TestFilterNonPassing(t *testing.T) { + nodes := structs.CheckServiceNodes{ + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + }, + }, + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + }, + }, + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthPassing, + }, + }, + }, + } + out := filterNonPassing(nodes) + if len(out) != 1 && reflect.DeepEqual(out[0], nodes[2]) { + t.Fatalf("bad: %v", out) + } +} From 825dcd04f4cbfba360716d3dc5721456156e7568 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Thu, 10 Jul 2014 18:06:36 -0700 Subject: [PATCH 19/43] There is no `-statsite` command-line flag. --- website/source/docs/agent/telemetry.html.markdown | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/telemetry.html.markdown b/website/source/docs/agent/telemetry.html.markdown index 473e2832de..1035304cc9 100644 --- a/website/source/docs/agent/telemetry.html.markdown +++ b/website/source/docs/agent/telemetry.html.markdown @@ -18,9 +18,10 @@ information to the stderr of the agent. In general, the telemetry information is used for debugging or otherwise getting a better view into what Consul is doing. -Additionally, if the `-statsite` [option](/docs/agent/options.html) is provided, -then the telemetry information will be streamed to a [statsite](http://github.com/armon/statsite) -server where it can be aggregate and flushed to Graphite or any other metrics store. +Additionally, if the `statsite_addr` [configuration option](/docs/agent/options.html) +is provided, then the telemetry information will be streamed to a +[statsite](http://github.com/armon/statsite) server where it can be +aggregate and flushed to Graphite or any other metrics store. Below is an example output: From f3b9fcb49ab861d1b910046b4a6e1c9740fe1e82 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Thu, 10 Jul 2014 18:16:47 -0700 Subject: [PATCH 20/43] While we're at it, make statsite_addr work at all. --- command/agent/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command/agent/config.go b/command/agent/config.go index f08d545b18..9a6a043f8a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -500,6 +500,9 @@ func MergeConfig(a, b *Config) *Config { if b.SkipLeaveOnInt == true { result.SkipLeaveOnInt = true } + if b.StatsiteAddr != "" { + result.StatsiteAddr = b.StatsiteAddr + } if b.EnableDebug { result.EnableDebug = true } From 140a910fed903082cce9f9afebaa5a146bd48dec Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 15 Jul 2014 09:50:39 -0700 Subject: [PATCH 21/43] website: remove old docs on the -encrypt flag --- website/source/docs/agent/encryption.html.markdown | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/source/docs/agent/encryption.html.markdown b/website/source/docs/agent/encryption.html.markdown index 174a2aab4e..5671628b8b 100644 --- a/website/source/docs/agent/encryption.html.markdown +++ b/website/source/docs/agent/encryption.html.markdown @@ -28,7 +28,10 @@ With that key, you can enable encryption on the agent. You can verify encryption is enabled because the output will include "Encrypted: true". ``` -$ consul agent -data=/tmp/consul -encrypt=cg8StVXbQJ0gPvMd9o7yrg== +$ cat encrypt.json +{"encrypt": "cg8StVXbQJ0gPvMd9o7yrg=="} + +$ consul agent -data=/tmp/consul -config-file encrypt.json ==> Starting Consul agent... ==> Starting Consul agent RPC... ==> Consul agent running! From 6eb3e8ee30680259be315c50b9ba14938f255f1a Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 16 Jul 2014 15:11:45 -0700 Subject: [PATCH 22/43] agent: Fixing issue with excessive failed node filtering --- command/agent/dns.go | 2 ++ command/agent/dns_test.go | 42 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 697f314fcb..4a0bd14f0c 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -471,6 +471,7 @@ RPC: // health checks to prevent routing to unhealthy nodes func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { n := len(nodes) +OUTER: for i := 0; i < n; i++ { node := nodes[i] for _, check := range node.Checks { @@ -480,6 +481,7 @@ func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs. nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} n-- i-- + continue OUTER } } } diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index 3a2804a4c5..cf1ae791f7 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -651,6 +651,40 @@ func TestDNS_ServiceLookup_FilterCritical(t *testing.T) { t.Fatalf("err: %v", err) } + args3 := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.2", + Service: &structs.NodeService{ + Service: "db", + Tags: []string{"master"}, + Port: 12345, + }, + Check: &structs.HealthCheck{ + CheckID: "db", + Name: "db", + ServiceID: "db", + Status: structs.HealthCritical, + }, + } + if err := srv.agent.RPC("Catalog.Register", args3, &out); err != nil { + t.Fatalf("err: %v", err) + } + + args4 := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "baz", + Address: "127.0.0.3", + Service: &structs.NodeService{ + Service: "db", + Tags: []string{"master"}, + Port: 12345, + }, + } + if err := srv.agent.RPC("Catalog.Register", args4, &out); err != nil { + t.Fatalf("err: %v", err) + } + m := new(dns.Msg) m.SetQuestion("db.service.consul.", dns.TypeANY) @@ -662,9 +696,15 @@ func TestDNS_ServiceLookup_FilterCritical(t *testing.T) { } // Should get no answer since we are failing! - if len(in.Answer) != 0 { + if len(in.Answer) != 1 { t.Fatalf("Bad: %#v", in) } + + resp := in.Answer[0] + aRec := resp.(*dns.A) + if aRec.A.String() != "127.0.0.3" { + t.Fatalf("Bad: %#v", in.Answer[0]) + } } func TestDNS_ServiceLookup_Randomize(t *testing.T) { From be215e67171ea454207858f61a99342e8aa37760 Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Thu, 17 Jul 2014 09:48:38 -0400 Subject: [PATCH 23/43] ui: scroll extra items on nodes/services fixes #232 --- ui/styles/_lists.scss | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/styles/_lists.scss b/ui/styles/_lists.scss index 8bded01e9a..7e3cfee6f2 100644 --- a/ui/styles/_lists.scss +++ b/ui/styles/_lists.scss @@ -16,6 +16,8 @@ padding-left: 0px; color: $gray; font-size: 13px; + overflow: scroll; + height: 30px; } .list-group-item-heading { From 663bd102e7e4a95b26a29d74911107d096d69c61 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Thu, 17 Jul 2014 10:57:15 -0700 Subject: [PATCH 24/43] agent: Provide better DNS setup error messages --- command/agent/dns.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 4a0bd14f0c..522376e150 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -84,14 +84,14 @@ func NewDNSServer(agent *Agent, config *DNSConfig, logOutput io.Writer, domain, go func() { err := server.ListenAndServe() srv.logger.Printf("[ERR] dns: error starting udp server: %v", err) - errCh <- err + errCh <- fmt.Errorf("dns udp setup failed: %v", err) }() errChTCP := make(chan error, 1) go func() { err := serverTCP.ListenAndServe() srv.logger.Printf("[ERR] dns: error starting tcp server: %v", err) - errChTCP <- err + errChTCP <- fmt.Errorf("dns tcp setup failed: %v", err) }() // Check the server is running, do a test lookup @@ -107,7 +107,7 @@ func NewDNSServer(agent *Agent, config *DNSConfig, logOutput io.Writer, domain, c := new(dns.Client) in, _, err := c.Exchange(m, bind) if err != nil { - checkCh <- err + checkCh <- fmt.Errorf("dns test query failed: %v", err) return } From 6c2ee5c8427312cd0cb0325083548bf6de0cca46 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 10:58:37 -0400 Subject: [PATCH 25/43] Cutting v0.3.1 --- CHANGELOG.md | 11 ++++++++++- bench/bench-aws.json | 4 ++-- bench/bench.json | 4 ++-- demo/vagrant-cluster/Vagrantfile | 2 +- version.go | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6834119c..c2954ce23b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,23 @@ -## 0.3.1 (Unreleased) +## 0.3.1 (July 21, 2014) + +FEATURES: + + * Improved bootstrapping process, thanks to @robxu9 BUG FIXES: * Fixed issue with service re-registration [GH-216] * Fixed handling of `-rejoin` flag + * Restored 0.2 TLS behavior, thanks to @nelhage [GH-233] + * Fix the statsite flags, thanks to @nelhage [GH-243] + * Fixed filters on criticial / non-passing checks [GH-241] IMPROVEMENTS: + * UI Improvements * Improved handling of Serf snapshot data * Increase reliability of failure detector + * More useful logging messages ## 0.3.0 (June 13, 2014) diff --git a/bench/bench-aws.json b/bench/bench-aws.json index 2adb91ccda..0cd03a14e0 100644 --- a/bench/bench-aws.json +++ b/bench/bench-aws.json @@ -53,8 +53,8 @@ "sudo mkdir /etc/consul.d", "sudo apt-get update", "sudo apt-get install unzip make", - "wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip", - "unzip 0.3.0_linux_amd64.zip", + "wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip", + "unzip 0.3.1_linux_amd64.zip", "sudo mv consul /usr/local/bin/consul", "chmod +x /usr/local/bin/consul" ] diff --git a/bench/bench.json b/bench/bench.json index c8b2910558..67603072f7 100644 --- a/bench/bench.json +++ b/bench/bench.json @@ -47,8 +47,8 @@ "mkdir /etc/consul.d", "apt-get update", "apt-get install unzip make", - "wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip", - "unzip 0.3.0_linux_amd64.zip", + "wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip", + "unzip 0.3.1_linux_amd64.zip", "mv consul /usr/local/bin/consul", "chmod +x /usr/local/bin/consul" ] diff --git a/demo/vagrant-cluster/Vagrantfile b/demo/vagrant-cluster/Vagrantfile index 5678c544ff..427f06e2e4 100644 --- a/demo/vagrant-cluster/Vagrantfile +++ b/demo/vagrant-cluster/Vagrantfile @@ -7,7 +7,7 @@ sudo apt-get install -y unzip echo Fetching Consul... cd /tmp/ -wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip -O consul.zip +wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip -O consul.zip echo Installing Consul... unzip consul.zip diff --git a/version.go b/version.go index 38fc1914e9..a38a77a3f8 100644 --- a/version.go +++ b/version.go @@ -4,7 +4,7 @@ package main var GitCommit string // The main version number that is being run at the moment. -const Version = "0.3.0" +const Version = "0.3.1" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release From 183118cf1cd78f432e779a6dd95c12d72d5a5c51 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 11:26:53 -0400 Subject: [PATCH 26/43] website: Update dummy --- website/dummy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/dummy.txt b/website/dummy.txt index 6cd25eecc3..c04dff067b 100644 --- a/website/dummy.txt +++ b/website/dummy.txt @@ -1,4 +1,4 @@ This file doesn't do anything, but we periodically update the number below just to force being able to deploy the website again. -1 +2 From ac745b1008523e9ed03927c1e2dfc1578dfeeaab Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 11:34:10 -0400 Subject: [PATCH 27/43] Adding the deps file for 0.3.1 --- deps/v0-3-1.json | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 deps/v0-3-1.json diff --git a/deps/v0-3-1.json b/deps/v0-3-1.json new file mode 100644 index 0000000000..14e4551e1e --- /dev/null +++ b/deps/v0-3-1.json @@ -0,0 +1,72 @@ +{ + "ImportPath": "github.com/hashicorp/consul", + "GoVersion": "go1.3", + "Deps": [ + { + "ImportPath": "github.com/armon/circbuf", + "Rev": "f092b4f207b6e5cce0569056fba9e1a2735cb6cf" + }, + { + "ImportPath": "github.com/armon/go-metrics", + "Rev": "02567bbc4f518a43853d262b651a3c8257c3f141" + }, + { + "ImportPath": "github.com/armon/gomdb", + "Rev": "a8e036c4dabe7437014ecf9dbc03c6f6f0766ef8" + }, + { + "ImportPath": "github.com/hashicorp/go-syslog", + "Rev": "ac3963b72ac367e48b1e68a831e62b93fb69091c" + }, + { + "ImportPath": "github.com/hashicorp/logutils", + "Rev": "8e0820fe7ac5eb2b01626b1d99df47c5449eb2d8" + }, + { + "ImportPath": "github.com/hashicorp/memberlist", + "Rev": "e6a282556f0e8f15e9a53dcb0d14912a3c2fb141" + }, + { + "ImportPath": "github.com/hashicorp/raft", + "Rev": "35f5fa082f5a064595d84715b0cf8821f002e9ac" + }, + { + "ImportPath": "github.com/hashicorp/raft-mdb", + "Rev": "9076b4b956c1c4c8a47117608b612bda2cb5f481" + }, + { + "ImportPath": "github.com/hashicorp/serf/serf", + "Comment": "v0.6.3-1-g7f260e7", + "Rev": "7f260e70a89739bd38c1f0bf3b74c0e1c1ee617f" + }, + { + "ImportPath": "github.com/hashicorp/yamux", + "Rev": "35417c7dfab4085d7c921b33e4d5ea6cf9ceef65" + }, + { + "ImportPath": "github.com/inconshreveable/muxado", + "Rev": "f693c7e88ba316d1a0ae3e205e22a01aa3ec2848" + }, + { + "ImportPath": "github.com/miekg/dns", + "Rev": "9af5c1f8a8a71bc5c8539d16cdc40b4a47ee7024" + }, + { + "ImportPath": "github.com/mitchellh/cli", + "Rev": "eaf0e415fc517a431dca53c7b2e7559d42238ebe" + }, + { + "ImportPath": "github.com/mitchellh/mapstructure", + "Rev": "6fb2c832bcac61d01212ab1d172f7a14a8585b07" + }, + { + "ImportPath": "github.com/ryanuber/columnize", + "Comment": "v2.0.1", + "Rev": "785d943a7b6886e0bb2f139a60487b823dd8d9de" + }, + { + "ImportPath": "github.com/ugorji/go/codec", + "Rev": "71c2886f5a673a35f909803f38ece5810165097b" + } + ] +} From ce9de56469de83b312fb726c34eab7cf5be04236 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Jul 2014 09:36:58 -0400 Subject: [PATCH 28/43] consul: Defer serf handler until initialized. Fixes #254. --- consul/server.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/consul/server.go b/consul/server.go index 1f3825da26..af61dc94c7 100644 --- a/consul/server.go +++ b/consul/server.go @@ -187,10 +187,6 @@ func NewServer(config *Config) (*Server, error) { return nil, fmt.Errorf("Failed to start Raft: %v", err) } - // Start the Serf listeners to prevent a deadlock - go s.lanEventHandler() - go s.wanEventHandler() - // Initialize the lan Serf s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false) @@ -198,6 +194,7 @@ func NewServer(config *Config) (*Server, error) { s.Shutdown() return nil, fmt.Errorf("Failed to start lan serf: %v", err) } + go s.lanEventHandler() // Initialize the wan Serf s.serfWAN, err = s.setupSerf(config.SerfWANConfig, @@ -206,6 +203,7 @@ func NewServer(config *Config) (*Server, error) { s.Shutdown() return nil, fmt.Errorf("Failed to start wan serf: %v", err) } + go s.wanEventHandler() // Start listening for RPC requests go s.listen() From 615792c1b8597198ac996234e98a16bb46bc8b7b Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Tue, 22 Jul 2014 12:42:24 -0400 Subject: [PATCH 29/43] Add the logo favicon to the webui --- ui/index.html | 1 + ui/static/favicon.png | Bin 0 -> 3657 bytes 2 files changed, 1 insertion(+) create mode 100644 ui/static/favicon.png diff --git a/ui/index.html b/ui/index.html index b8f35f0115..3902b8b21a 100644 --- a/ui/index.html +++ b/ui/index.html @@ -7,6 +7,7 @@ Consul + diff --git a/ui/static/favicon.png b/ui/static/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..ba6fbc34e0b333c89b684d1aa84bd7993ed1a129 GIT binary patch literal 3657 zcmbVPXH-*JyQcRNib_!kp{pb$B$SX)LMWm4CLn=C2#`V&x-^9WhcfDbR8c7c&L}WQ zM?eHc1W^Q~N(T`^0V#^e7iPw}_s6X7u655k=j^@T^FGhpf1FG9w&ubDG6HODY{EDT ztRriM9X@VPB3G!~kT4P_Wf!}|vlnLs~c0EvPIeQaq50Z9Zj$PHnmX+tw1 z29hkI=|rb!TWA01V1GRV$j|_YibS#q$V4U{7)cJHFp!aG(4V|W*8Wfo0RjJ1VFsf? ze>vrDV-GZ;(uqI>SWClS6AA@tYlES1gtj*HG!Ui5dn_S0#oS$5U8G>-XRAJroqzCU_?=v_(%;3L-7X# zmdNm@lW0s5l>$6u#QRahm}n5o)4xI>(`;=1A*L|?P82I;kVrfY0tIVA$mGMg{?ukL z9f|+r#=mMaoTF$&h$E3f4Ws+B>Tyo-2bh(+|8D3|k>w52j!t3~1s{T?`iGH;6ebRf z2C=??2_ym%ZUV(>n`mK;wP8?gT_{vnQ&$TM$7<x#Qiy4n{OgHi?R6@Wu*3JP6G^NVj|F!|oU2t0?n#v?)qFYI)gjqpiIUg~X;Bi;oISn@7^_nYmMWn= zPkrbnsvzsyhCv)T)>Gx!*4vL0&?zrIUPkw?4$AXT<76XU!^v0<$+|dIZ9r>(Z=Cm#B4qQbpQjqDMuIh^pw?SeaN|M}UEi zLsqx-E!mD`gL-U;rZwBNctu)_Q9h;@Af#FPfx1Vr?|)|ra9gbHTUFDkyykF^e!YW> zE)BEK1R zMW=|#RzmNc`##@Adt7w_)ZXZxKrme#-bxHC)QaBtE|@%Uz5V9N`(lO@T<1mIxxEw@ z40#W>Iqjl{>-CeqaYgpN`p#k2=JhfwV=lnQ>U-Aw@u~7N5l&9ry0g2K8Si?Imc%Cs zEy@SadK5F(EC@JRP|_NNE!cowc(Dks@EkLd`J~RVzkWKe`fj|#Db=gT6*>Vow(pB_ z)Ru{zU0Yd&1RF~h&3klgF>Y|wCTyaQF+aK1Vegc6HA03+3BeK_chOc{b&g7vu1=aKMFZwM;Xo;R+iz6Vb%`J&~aXLK1cXGvz13{<- zMX;99n>0SddX6EPK5$Dn^&xzwMrtJ;tMW9}7;v`7CO9xfywb#Cqwi6SnhewulLMLq z7+#TsDn^_x_`ErLjtp1$ah*=O%RC*H98x|KceU|FYPhgg&FInnk-&X`U?#|-w) z7F07|e|xt6>gx^l_>A!oC zJ-OK(-fkIW=~abz}JNI2PF*z#nv&{$K5hICTdgInCrtMk=wFXOE0FEOC-W{ z0N@s!t6+Lvl`U~4<2=W^{vxqaAb zRjk0-wVys4sHV${PLRBt;pLU}-!IyJ)*dMFtW7vReLTNpcu4XS|9C-lKEJGNW{fIX z_R>!o=O0x&mf0B!I;;LBoPTz|ZSl=D&Uwt%o|h!YxTK5CM{IyTYO;;P>+<5kNm^%H zm0C)c?wo=KhCrW=drt#*UG7LYn;S^{c#EWmu4QUTgDlOxM_=&D%-k2Lr zVS2Q5$>~4TSCzDheJN2{)Y+OuPQOBVeSIa2t`#9v+xDu&(=U!{G0@!a-9^S+sBE+B zFHchL=Qg@gDk;}u#Bn>V0!-s^_sw&SRYjh;6n~bJ=hnDtB3!SmiN2FB z(o!@K7JI(7X3i{|xFL}D5O6OvH+3gDt9d@w#jMFRR=w~@t9)i0cm1DTo(WVE zQn#s5{{E)b5mncw0zHLl3Bz*eJM2Q{pbz(@MI>~pwarRC*?(y5YYH7KIl3RW5DYnv zc83ngJDawjI-ORfE_20P+C!ZT%kZk;=>EIwql~sa@n?NBrP)WN zUyl-6@%gV&<0z?qSLgc0UGpV^Tw_?TBvhpAb_H|OTLP())U9CUbE7>h_kCuzlV-m; zyRg3}IR^|5Pl%`!kbxSc11T+cNy6jUy#+UBm{(fqDO>J1zVy^`&sn>Xo{L3G zf{C(K9K*;pkI`-!roxq=ODk0!Q|OOb(O1LVEbHpocGy>5w&ExVKXO=84!_kx5!&i8 zd_0lb&}EDUiwU~Fd77oZ37g3hnx3#qV%ML)6JO?dJM@vCvaw@Ga=C^QsCcMT?xtVk zL=ICU{i9LcM6mFuigNVJw=ZaC>~Kn(&Dk)r;Y~gXIgZTfQ8DS`;afLTy01=h(%&?C z*7MFj$Ppu4lS6+ReSbf_L*4v?t>c&;W$td~+d?g;fzZO#k1_>aMdSJpmg(uCbOTT) zVCiRS`1;M1CyHMjnr4ov-i?*yqKwQzM(=->-vmsMjWip?j3-sY?E~rzg~Fac3-i$V zuostciP6L6cRIBBlr1Bm4!XU+7T#zj{<1^`kzBm)6DL=Q>gzdgsc4`jWsl9TYv?64 zb&kTaYxmV6O3w4DxoMQ~l(GA62wZYpuoQUzMXfzZqGMVA-Oef4Cbiry!`?rM!O&QG ztQe_!p#X3n7tXIjdv%%gS}G|WqJF!xIK(m55@qQ4lN>eZcd}=7#FJY`2)T-7t)_-f zN?pn#DP3Zdrg=11Lg|ZRfaQJP@iq-gkNQE!(J1Lf_)qkDTh!G;Zl zb`fOEtl!PgIW3E?U53Gr!M-r(XXWttGGBQ7Pc!?Hk6bhyHtZGO*EhQa->ylMf!u`@ zYHxRR1<2bWMYD&7se$2Lv)`Hm-Qz`SHfR8Q4(pmmy}?+je5&=Qf=?KF8ArQEw82U1&a<_*QD2(O zkG?mC^7ZoUNvKj>I^CRwb9G_QNiL!ibcFYTgfE+T@cXN%sb-bKpF5nXE%qM9_riYw D-Q7mK literal 0 HcmV?d00001 From 436cd144a5472421b2793425f6852670eef8cbc4 Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Tue, 22 Jul 2014 13:31:44 -0400 Subject: [PATCH 30/43] ui: add tests param to readme --- ui/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/README.md b/ui/README.md index 068d286081..6e1fabb59a 100644 --- a/ui/README.md +++ b/ui/README.md @@ -44,6 +44,8 @@ An example of this command, from inside the `ui/` directory, would be: consul agent -bootstrap -server -data-dir /tmp/ -ui-dir . +Basic tests can be run by adding the `?test` query parameter to the +application. ### Releasing From e5798c74d2d473ff8965556ab74c612619de3dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:37:25 +0200 Subject: [PATCH 31/43] Add helper for lowercase list of strings --- consul/util.go | 9 +++++++++ consul/util_test.go | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/consul/util.go b/consul/util.go index 00815ea10c..96ee5c3276 100644 --- a/consul/util.go +++ b/consul/util.go @@ -9,6 +9,7 @@ import ( "path/filepath" "runtime" "strconv" + "strings" "github.com/hashicorp/serf/serf" ) @@ -68,6 +69,14 @@ func strContains(l []string, s string) bool { return false } +func ToLowerList(l []string) []string { + var out []string + for _, value := range l { + out = append(out, strings.ToLower(value)) + } + return out +} + // ensurePath is used to make sure a path exists func ensurePath(path string, dir bool) error { if !dir { diff --git a/consul/util_test.go b/consul/util_test.go index 107146b521..91b7fd2f53 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -18,6 +18,15 @@ func TestStrContains(t *testing.T) { } } +func TestToLowerList(t *testing.T) { + l := []string{"ABC", "Abc", "abc"} + for _, value := range ToLowerList(l) { + if value != "abc" { + t.Fatalf("failed lowercasing") + } + } +} + func TestIsPrivateIP(t *testing.T) { if !isPrivateIP("192.168.1.1") { t.Fatalf("bad") From ee4de117417e54bc83e1857f6f991cbe21efe7b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:38:01 +0200 Subject: [PATCH 32/43] Add case-insensitive flag to `MDBIndex` --- consul/mdb_table.go | 13 +++++++------ consul/state_store.go | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/consul/mdb_table.go b/consul/mdb_table.go index c4c84b0dc9..53c85d7cf3 100644 --- a/consul/mdb_table.go +++ b/consul/mdb_table.go @@ -45,12 +45,13 @@ type MDBTables []*MDBTable // An Index is named, and uses a series of column values to // map to the row-id containing the table type MDBIndex struct { - AllowBlank bool // Can fields be blank - Unique bool // Controls if values are unique - Fields []string // Fields are used to build the index - IdxFunc IndexFunc // Can be used to provide custom indexing - Virtual bool // Virtual index does not exist, but can be used for queries - RealIndex string // Virtual indexes use a RealIndex for iteration + AllowBlank bool // Can fields be blank + Unique bool // Controls if values are unique + Fields []string // Fields are used to build the index + IdxFunc IndexFunc // Can be used to provide custom indexing + Virtual bool // Virtual index does not exist, but can be used for queries + RealIndex string // Virtual indexes use a RealIndex for iteration + CaseInsensitive bool // Controls if values are case-insensitive table *MDBTable name string diff --git a/consul/state_store.go b/consul/state_store.go index a2f139af1c..39d778850c 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -179,6 +179,7 @@ func (s *StateStore) initialize() error { "id": &MDBIndex{ Unique: true, Fields: []string{"Node"}, + CaseInsensitive: true, }, }, Decoder: func(buf []byte) interface{} { From e0ba9a48a7dc13602f05a8be6e9c42ed6238ba49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:38:24 +0200 Subject: [PATCH 33/43] Test DNS case-insensitivity --- command/agent/dns_test.go | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index cf1ae791f7..7d664d74a0 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -136,6 +136,40 @@ func TestDNS_NodeLookup(t *testing.T) { } } +func TestDNS_CaseInsensitiveNodeLookup(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Register node + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "Foo", + Address: "127.0.0.1", + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + m := new(dns.Msg) + m.SetQuestion("fOO.node.DC1.consul.", dns.TypeANY) + + c := new(dns.Client) + addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) + in, _, err := c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != 1 { + t.Fatalf("empty lookup: %#v", in) + } +} + func TestDNS_NodeLookup_PeriodName(t *testing.T) { dir, srv := makeDNSServer(t) defer os.RemoveAll(dir) @@ -336,6 +370,45 @@ func TestDNS_ServiceLookup(t *testing.T) { } } +func TestDNS_CaseInsensitiveServiceLookup(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Register node + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + Service: "Db", + Tags: []string{"Master"}, + Port: 12345, + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + m := new(dns.Msg) + m.SetQuestion("mASTER.dB.service.consul.", dns.TypeSRV) + + c := new(dns.Client) + addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) + in, _, err := c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != 1 { + t.Fatalf("empty lookup: %#v", in) + } +} + func TestDNS_ServiceLookup_TagPeriod(t *testing.T) { dir, srv := makeDNSServer(t) defer os.RemoveAll(dir) From e4de2210784a5ba5952507a2d3e10b3fc52ba671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:28:54 +0200 Subject: [PATCH 34/43] Always lowercase incoming DNS query --- command/agent/dns.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 522376e150..b83243720a 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -248,7 +248,7 @@ func (d *DNSServer) dispatch(network string, req, resp *dns.Msg) { datacenter := d.agent.config.Datacenter // Get the QName without the domain suffix - qName := dns.Fqdn(req.Question[0].Name) + qName := strings.ToLower(dns.Fqdn(req.Question[0].Name)) qName = strings.TrimSuffix(qName, d.domain) // Split into the label parts From 0d71b97f8ef1ed1aecf547a9550cf9c69db26f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:29:28 +0200 Subject: [PATCH 35/43] Remove DC case-insensitive check from node test --- command/agent/dns_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index 7d664d74a0..7d42266143 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -156,7 +156,7 @@ func TestDNS_CaseInsensitiveNodeLookup(t *testing.T) { } m := new(dns.Msg) - m.SetQuestion("fOO.node.DC1.consul.", dns.TypeANY) + m.SetQuestion("fOO.node.dc1.consul.", dns.TypeANY) c := new(dns.Client) addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) From 9359f899f5eea7029f9e9003202e4d1d7d1b6810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:30:12 +0200 Subject: [PATCH 36/43] Lowercase index key and lookup value if flag is set --- consul/mdb_table.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/consul/mdb_table.go b/consul/mdb_table.go index 53c85d7cf3..592bce849a 100644 --- a/consul/mdb_table.go +++ b/consul/mdb_table.go @@ -427,6 +427,10 @@ func (t *MDBTable) getIndex(index string, parts []string) (*MDBIndex, []byte, er return nil, nil, tooManyFields } + if idx.CaseInsensitive { + parts = ToLowerList(parts) + } + // Construct the key key := idx.keyFromParts(parts...) return idx, key, nil @@ -614,6 +618,9 @@ func (i *MDBIndex) keyFromObject(obj interface{}) ([]byte, error) { if !i.AllowBlank && val == "" { return nil, fmt.Errorf("Field '%s' must be set: %#v", field, obj) } + if i.CaseInsensitive { + val = strings.ToLower(val) + } parts = append(parts, val) } key := i.keyFromParts(parts...) From 9ad8b9ff19b5f85fe92f611059171b888c116b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:33:27 +0200 Subject: [PATCH 37/43] Make service index case-insensitive --- consul/state_store.go | 1 + 1 file changed, 1 insertion(+) diff --git a/consul/state_store.go b/consul/state_store.go index 39d778850c..ed823eb2e1 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -201,6 +201,7 @@ func (s *StateStore) initialize() error { "service": &MDBIndex{ AllowBlank: true, Fields: []string{"ServiceName"}, + CaseInsensitive: true, }, }, Decoder: func(buf []byte) interface{} { From 37426f7410fabc476449126460231f3e91ecdf7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:33:47 +0200 Subject: [PATCH 38/43] Make service tag filter case-insensitive --- consul/state_store.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/consul/state_store.go b/consul/state_store.go index ed823eb2e1..043a871032 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -642,7 +642,8 @@ func serviceTagFilter(l []interface{}, tag string) []interface{} { n := len(l) for i := 0; i < n; i++ { srv := l[i].(*structs.ServiceNode) - if !strContains(srv.ServiceTags, tag) { + srv.ServiceTags = ToLowerList(srv.ServiceTags) + if !strContains(srv.ServiceTags, strings.ToLower(tag)) { l[i], l[n-1] = l[n-1], nil i-- n-- From 57d62eb492e0fbb3bc84c1803ea8d98239655e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:34:03 +0200 Subject: [PATCH 39/43] Change order of fixtures --- consul/catalog_endpoint_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/consul/catalog_endpoint_test.go b/consul/catalog_endpoint_test.go index f9721ca098..06066b7dbe 100644 --- a/consul/catalog_endpoint_test.go +++ b/consul/catalog_endpoint_test.go @@ -220,13 +220,13 @@ func TestCatalogListNodes(t *testing.T) { }) // Server node is auto added from Serf - if out.Nodes[0].Node != s1.config.NodeName { + if out.Nodes[1].Node != s1.config.NodeName { t.Fatalf("bad: %v", out) } - if out.Nodes[1].Node != "foo" { + if out.Nodes[0].Node != "foo" { t.Fatalf("bad: %v", out) } - if out.Nodes[1].Address != "127.0.0.1" { + if out.Nodes[0].Address != "127.0.0.1" { t.Fatalf("bad: %v", out) } } From 945e19e139c72230d51e571b978b3ac9200bf193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 15:11:45 +0200 Subject: [PATCH 40/43] Don't override `ServiceTags` --- consul/state_store.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/consul/state_store.go b/consul/state_store.go index 043a871032..12e1c6c453 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -642,8 +642,7 @@ func serviceTagFilter(l []interface{}, tag string) []interface{} { n := len(l) for i := 0; i < n; i++ { srv := l[i].(*structs.ServiceNode) - srv.ServiceTags = ToLowerList(srv.ServiceTags) - if !strContains(srv.ServiceTags, strings.ToLower(tag)) { + if !strContains(ToLowerList(srv.ServiceTags), strings.ToLower(tag)) { l[i], l[n-1] = l[n-1], nil i-- n-- From dd9c59b6c44dc3baeecff20e771206704e7ba618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 23:39:13 +0200 Subject: [PATCH 41/43] Short doc note about DNS case-insensitivity --- website/source/docs/agent/dns.html.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website/source/docs/agent/dns.html.markdown b/website/source/docs/agent/dns.html.markdown index 46303a60d6..938c989a6b 100644 --- a/website/source/docs/agent/dns.html.markdown +++ b/website/source/docs/agent/dns.html.markdown @@ -20,7 +20,8 @@ with no failing health checks. It's that simple! There are a number of [configuration options](/docs/agent/options.html) that are important for the DNS interface. They are `client_addr`, `ports.dns`, `recursor`, `domain`, and `dns_config`. By default Consul will listen on 127.0.0.1:8600 for DNS queries -in the "consul." domain, without support for DNS recursion. +in the "consul." domain, without support for DNS recursion. All queries are case-insensitive, a +name lookup for `PostgreSQL.node.dc1.consul` will find all nodes named `postgresql`, no matter of case. There are a few ways to use the DNS interface. One option is to use a custom DNS resolver library and point it at Consul. Another option is to set Consul From 674152a3da9aedf45b448e2c680ec285e81a0516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 23:45:03 +0200 Subject: [PATCH 42/43] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2954ce23b..97f87722ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.3.2 (Unreleased) + +IMPROVEMENTS: + + * DNS case-insensitivity [GH-189] + ## 0.3.1 (July 21, 2014) FEATURES: From 6b52d410b32d0cac0fe208351b193b5191f26550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 24 Jul 2014 01:09:55 +0200 Subject: [PATCH 43/43] Run `go fmt` --- consul/state_store.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/consul/state_store.go b/consul/state_store.go index 12e1c6c453..f95b0554e0 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -177,8 +177,8 @@ func (s *StateStore) initialize() error { Name: dbNodes, Indexes: map[string]*MDBIndex{ "id": &MDBIndex{ - Unique: true, - Fields: []string{"Node"}, + Unique: true, + Fields: []string{"Node"}, CaseInsensitive: true, }, }, @@ -199,8 +199,8 @@ func (s *StateStore) initialize() error { Fields: []string{"Node", "ServiceID"}, }, "service": &MDBIndex{ - AllowBlank: true, - Fields: []string{"ServiceName"}, + AllowBlank: true, + Fields: []string{"ServiceName"}, CaseInsensitive: true, }, },