From a155423f2948bec1bad0784058d3da2d7ee90bd2 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Fri, 23 Oct 2020 13:41:54 -0500 Subject: [PATCH] server: config entry replication now correctly uses namespaces in comparisons (#9024) Previously config entries sharing a kind & name but in different namespaces could occasionally cause "stuck states" in replication because the namespace fields were ignored during the differential comparison phase. Example: Two config entries written to the primary: kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo Under the covers these both get saved to memdb, so they are sorted by all 3 components (kind,name,namespace) during natural iteration. This means that before the replication code does it's own incomplete sort, the underlying data IS sorted by namespace ascending (bar comes before foo). After one pass of replication the primary and secondary datacenters have the same set of config entries present. If "kind=A,name=web,namespace=bar" were to be deleted, then things get weird. Before replication the two sides look like: primary: [ kind=A,name=web,namespace=foo ] secondary: [ kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo ] The differential comparison phase walks these two lists in sorted order and first compares "kind=A,name=web,namespace=foo" vs "kind=A,name=web,namespace=bar" and falsely determines they are the SAME and are thus cause an update of "kind=A,name=web,namespace=foo". Then it compares "" with "kind=A,name=web,namespace=foo" and falsely determines that the latter should be DELETED. During reconciliation the deletes are processed before updates, and so for a brief moment in the secondary "kind=A,name=web,namespace=foo" is erroneously deleted and then immediately restored. Unfortunately after this replication phase the final state is identical to the initial state, so when it loops around again (rate limited) it repeats the same set of operations indefinitely. --- .changelog/9024.txt | 3 + agent/consul/config_replication.go | 42 +++++++++---- agent/consul/config_replication_test.go | 80 +++++++++++++++++++++++++ agent/structs/config_entry.go | 6 ++ agent/structs/connect_proxy_config.go | 11 ++++ 5 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 .changelog/9024.txt diff --git a/.changelog/9024.txt b/.changelog/9024.txt new file mode 100644 index 0000000000..f6475a6e03 --- /dev/null +++ b/.changelog/9024.txt @@ -0,0 +1,3 @@ +```release-note:security +Fix Consul Enterprise Namespace Config Entry Replication DoS. Previously an operator with service:write ACL permissions in a Consul Enterprise cluster could write a malicious config entry that caused infinite raft writes due to issues with the namespace replication logic. [CVE-2020-25201] (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-25201) +``` diff --git a/agent/consul/config_replication.go b/agent/consul/config_replication.go index 3562859b10..e3f58e88e6 100644 --- a/agent/consul/config_replication.go +++ b/agent/consul/config_replication.go @@ -11,12 +11,8 @@ import ( "github.com/hashicorp/go-hclog" ) -func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool { - return first.GetKind() < second.GetKind() || (first.GetKind() == second.GetKind() && first.GetName() < second.GetName()) -} - func configSort(configs []structs.ConfigEntry) { - sort.Slice(configs, func(i, j int) bool { + sort.SliceStable(configs, func(i, j int) bool { return cmpConfigLess(configs[i], configs[j]) }) } @@ -25,12 +21,14 @@ func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry configSort(local) configSort(remote) - var deletions []structs.ConfigEntry - var updates []structs.ConfigEntry - var localIdx int - var remoteIdx int + var ( + deletions []structs.ConfigEntry + updates []structs.ConfigEntry + localIdx int + remoteIdx int + ) for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); { - if local[localIdx].GetKind() == remote[remoteIdx].GetKind() && local[localIdx].GetName() == remote[remoteIdx].GetName() { + if configSameID(local[localIdx], remote[remoteIdx]) { // config is in both the local and remote state - need to check raft indices if remote[remoteIdx].GetRaftIndex().ModifyIndex > lastRemoteIndex { updates = append(updates, remote[remoteIdx]) @@ -64,6 +62,30 @@ func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry return deletions, updates } +func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool { + if first.GetKind() < second.GetKind() { + return true + } + if first.GetKind() > second.GetKind() { + return false + } + + if first.GetEnterpriseMeta().LessThan(second.GetEnterpriseMeta()) { + return true + } + if second.GetEnterpriseMeta().LessThan(first.GetEnterpriseMeta()) { + return false + } + + return first.GetName() < second.GetName() +} + +func configSameID(e1, e2 structs.ConfigEntry) bool { + return e1.GetKind() == e2.GetKind() && + e1.GetEnterpriseMeta().IsSame(e2.GetEnterpriseMeta()) && + e1.GetName() == e2.GetName() +} + func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.ConfigEntry, op structs.ConfigEntryOp) (bool, error) { ticker := time.NewTicker(time.Second / time.Duration(s.config.ConfigReplicationApplyLimit)) defer ticker.Stop() diff --git a/agent/consul/config_replication_test.go b/agent/consul/config_replication_test.go index 3780b1ea27..72cedd0a1d 100644 --- a/agent/consul/config_replication_test.go +++ b/agent/consul/config_replication_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "testing" + "time" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/sdk/testutil/retry" @@ -11,6 +12,85 @@ import ( "github.com/stretchr/testify/require" ) +func TestReplication_ConfigSort(t *testing.T) { + newDefaults := func(name, protocol string) *structs.ServiceConfigEntry { + return &structs.ServiceConfigEntry{ + Kind: structs.ServiceDefaults, + Name: name, + Protocol: protocol, + } + } + newResolver := func(name string, timeout time.Duration) *structs.ServiceResolverConfigEntry { + return &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: name, + ConnectTimeout: timeout, + } + } + + type testcase struct { + configs []structs.ConfigEntry + expect []structs.ConfigEntry + } + + cases := map[string]testcase{ + "none": {}, + "one": { + configs: []structs.ConfigEntry{ + newDefaults("web", "grpc"), + }, + expect: []structs.ConfigEntry{ + newDefaults("web", "grpc"), + }, + }, + "just kinds": { + configs: []structs.ConfigEntry{ + newResolver("web", 33*time.Second), + newDefaults("web", "grpc"), + }, + expect: []structs.ConfigEntry{ + newDefaults("web", "grpc"), + newResolver("web", 33*time.Second), + }, + }, + "just names": { + configs: []structs.ConfigEntry{ + newDefaults("db", "grpc"), + newDefaults("api", "http2"), + }, + expect: []structs.ConfigEntry{ + newDefaults("api", "http2"), + newDefaults("db", "grpc"), + }, + }, + "all": { + configs: []structs.ConfigEntry{ + newResolver("web", 33*time.Second), + newDefaults("web", "grpc"), + newDefaults("db", "grpc"), + newDefaults("api", "http2"), + }, + expect: []structs.ConfigEntry{ + newDefaults("api", "http2"), + newDefaults("db", "grpc"), + newDefaults("web", "grpc"), + newResolver("web", 33*time.Second), + }, + }, + } + + for name, tc := range cases { + tc := tc + t.Run(name, func(t *testing.T) { + configSort(tc.configs) + require.Equal(t, tc.expect, tc.configs) + // and it should be stable + configSort(tc.configs) + require.Equal(t, tc.expect, tc.configs) + }) + } +} + func TestReplication_ConfigEntries(t *testing.T) { t.Parallel() dir1, s1 := testServerWithConfig(t, func(c *Config) { diff --git a/agent/structs/config_entry.go b/agent/structs/config_entry.go index b1ffd3e0d6..338a542549 100644 --- a/agent/structs/config_entry.go +++ b/agent/structs/config_entry.go @@ -71,6 +71,12 @@ type ServiceConfigEntry struct { RaftIndex } +func (e *ServiceConfigEntry) Clone() *ServiceConfigEntry { + e2 := *e + e2.Expose = e.Expose.Clone() + return &e2 +} + func (e *ServiceConfigEntry) GetKind() string { return ServiceDefaults } diff --git a/agent/structs/connect_proxy_config.go b/agent/structs/connect_proxy_config.go index bad538781c..40db900907 100644 --- a/agent/structs/connect_proxy_config.go +++ b/agent/structs/connect_proxy_config.go @@ -408,6 +408,17 @@ type ExposeConfig struct { Paths []ExposePath `json:",omitempty"` } +func (e ExposeConfig) Clone() ExposeConfig { + e2 := e + if len(e.Paths) > 0 { + e2.Paths = make([]ExposePath, 0, len(e.Paths)) + for _, p := range e.Paths { + e2.Paths = append(e2.Paths, p) + } + } + return e2 +} + type ExposePath struct { // ListenerPort defines the port of the proxy's listener for exposed paths. ListenerPort int `json:",omitempty" alias:"listener_port"`