From 7f58576042ce0cfb3a68c9e3bdcfb65de64b2b24 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 27 Mar 2017 00:15:42 -0700 Subject: [PATCH] Adds node ID integrity checking for cluster merges. --- consul/client.go | 6 +- consul/merge.go | 25 ++++++- consul/merge_test.go | 160 +++++++++++++++++++++++++++++++++++++++++++ consul/server.go | 6 +- 4 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 consul/merge_test.go diff --git a/consul/client.go b/consul/client.go index 959fde26b6..78c671289c 100644 --- a/consul/client.go +++ b/consul/client.go @@ -156,7 +156,11 @@ func (c *Client) setupSerf(conf *serf.Config, ch chan serf.Event, path string) ( conf.SnapshotPath = filepath.Join(c.config.DataDir, path) conf.ProtocolVersion = protocolVersionMap[c.config.ProtocolVersion] conf.RejoinAfterLeave = c.config.RejoinAfterLeave - conf.Merge = &lanMergeDelegate{dc: c.config.Datacenter} + conf.Merge = &lanMergeDelegate{ + dc: c.config.Datacenter, + nodeID: c.config.NodeID, + nodeName: c.config.NodeName, + } if err := lib.EnsurePath(conf.SnapshotPath, false); err != nil { return nil, err } diff --git a/consul/merge.go b/consul/merge.go index defa7ef108..335d0280a4 100644 --- a/consul/merge.go +++ b/consul/merge.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/hashicorp/consul/consul/agent" + "github.com/hashicorp/consul/types" "github.com/hashicorp/serf/serf" ) @@ -11,11 +12,33 @@ import ( // ring. We check that the peers are in the same datacenter and abort the // merge if there is a mis-match. type lanMergeDelegate struct { - dc string + dc string + nodeID types.NodeID + nodeName string } func (md *lanMergeDelegate) NotifyMerge(members []*serf.Member) error { + nodeMap := make(map[types.NodeID]string) for _, m := range members { + if rawID, ok := m.Tags["id"]; ok && rawID != "" { + nodeID := types.NodeID(rawID) + + // See if there's another node that conflicts with us. + if (nodeID == md.nodeID) && (m.Name != md.nodeName) { + return fmt.Errorf("Member '%s' has conflicting node ID '%s' with this agent's ID", + m.Name, nodeID) + } + + // See if there are any two nodes that conflict with each + // other. This lets us only do joins into a hygienic + // cluster now that node IDs are critical for operation. + if other, ok := nodeMap[nodeID]; ok { + return fmt.Errorf("Member '%s' has conflicting node ID '%s' with member '%s'", + m.Name, nodeID, other) + } + nodeMap[nodeID] = m.Name + } + ok, dc := isConsulNode(*m) if ok { if dc != md.dc { diff --git a/consul/merge_test.go b/consul/merge_test.go new file mode 100644 index 0000000000..034a99bc3c --- /dev/null +++ b/consul/merge_test.go @@ -0,0 +1,160 @@ +package consul + +import ( + "strings" + "testing" + + "github.com/hashicorp/consul/types" + "github.com/hashicorp/serf/serf" +) + +func makeNode(dc, name, id string, server bool) *serf.Member { + var role string + if server { + role = "consul" + } else { + role = "node" + } + + return &serf.Member{ + Name: name, + Tags: map[string]string{ + "role": role, + "dc": dc, + "id": id, + "port": "8300", + "build": "0.7.5", + "vsn": "2", + "vsn_max": "3", + "vsn_min": "2", + }, + } +} + +func TestMerge_LAN(t *testing.T) { + cases := []struct { + members []*serf.Member + expect string + }{ + // Client in the wrong datacenter. + { + members: []*serf.Member{ + makeNode("dc2", + "node1", + "96430788-246f-4379-94ce-257f7429e340", + false), + }, + expect: "wrong datacenter", + }, + // Server in the wrong datacenter. + { + members: []*serf.Member{ + makeNode("dc2", + "node1", + "96430788-246f-4379-94ce-257f7429e340", + true), + }, + expect: "wrong datacenter", + }, + // Node ID conflict with delegate's ID. + { + members: []*serf.Member{ + makeNode("dc1", + "node1", + "ee954a2f-80de-4b34-8780-97b942a50a99", + true), + }, + expect: "with this agent's ID", + }, + // Cluster with existing conflicting node IDs. + { + members: []*serf.Member{ + makeNode("dc1", + "node1", + "6185913b-98d7-4441-bd8f-f7f7d854a4af", + true), + makeNode("dc1", + "node2", + "6185913b-98d7-4441-bd8f-f7f7d854a4af", + true), + }, + expect: "with member", + }, + // Good cluster. + { + members: []*serf.Member{ + makeNode("dc1", + "node1", + "6185913b-98d7-4441-bd8f-f7f7d854a4af", + true), + makeNode("dc1", + "node2", + "cda916bc-a357-4a19-b886-59419fcee50c", + true), + }, + expect: "", + }, + } + + delegate := &lanMergeDelegate{ + dc: "dc1", + nodeID: types.NodeID("ee954a2f-80de-4b34-8780-97b942a50a99"), + nodeName: "node0", + } + for i, c := range cases { + if err := delegate.NotifyMerge(c.members); c.expect == "" { + if err != nil { + t.Fatalf("case %d: err: %v", i+1, err) + } + } else { + if err == nil || !strings.Contains(err.Error(), c.expect) { + t.Fatalf("case %d: err: %v", i+1, err) + } + } + } +} + +func TestMerge_WAN(t *testing.T) { + cases := []struct { + members []*serf.Member + expect string + }{ + // Not a server + { + members: []*serf.Member{ + makeNode("dc2", + "node1", + "96430788-246f-4379-94ce-257f7429e340", + false), + }, + expect: "not a server", + }, + // Good cluster. + { + members: []*serf.Member{ + makeNode("dc2", + "node1", + "6185913b-98d7-4441-bd8f-f7f7d854a4af", + true), + makeNode("dc3", + "node2", + "cda916bc-a357-4a19-b886-59419fcee50c", + true), + }, + expect: "", + }, + } + + delegate := &wanMergeDelegate{} + for i, c := range cases { + if err := delegate.NotifyMerge(c.members); c.expect == "" { + if err != nil { + t.Fatalf("case %d: err: %v", i+1, err) + } + } else { + if err == nil || !strings.Contains(err.Error(), c.expect) { + t.Fatalf("case %d: err: %v", i+1, err) + } + } + } +} diff --git a/consul/server.go b/consul/server.go index 5822ab9659..2e9fbce0a7 100644 --- a/consul/server.go +++ b/consul/server.go @@ -396,7 +396,11 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if wan { conf.Merge = &wanMergeDelegate{} } else { - conf.Merge = &lanMergeDelegate{dc: s.config.Datacenter} + conf.Merge = &lanMergeDelegate{ + dc: s.config.Datacenter, + nodeID: s.config.NodeID, + nodeName: s.config.NodeName, + } } // Until Consul supports this fully, we disable automatic resolution.