server: config entry replication now correctly uses namespaces in comparisons (#9024)

Previously config entries sharing a kind & name but in different
namespaces could occasionally cause "stuck states" in replication
because the namespace fields were ignored during the differential
comparison phase.

Example:

Two config entries written to the primary:

    kind=A,name=web,namespace=bar
    kind=A,name=web,namespace=foo

Under the covers these both get saved to memdb, so they are sorted by
all 3 components (kind,name,namespace) during natural iteration. This
means that before the replication code does it's own incomplete sort,
the underlying data IS sorted by namespace ascending (bar comes before
foo).

After one pass of replication the primary and secondary datacenters have
the same set of config entries present. If
"kind=A,name=web,namespace=bar" were to be deleted, then things get
weird. Before replication the two sides look like:

primary: [
    kind=A,name=web,namespace=foo
]
secondary: [
    kind=A,name=web,namespace=bar
    kind=A,name=web,namespace=foo
]

The differential comparison phase walks these two lists in sorted order
and first compares "kind=A,name=web,namespace=foo" vs
"kind=A,name=web,namespace=bar" and falsely determines they are the SAME
and are thus cause an update of "kind=A,name=web,namespace=foo". Then it
compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely
determines that the latter should be DELETED.

During reconciliation the deletes are processed before updates, and so
for a brief moment in the secondary "kind=A,name=web,namespace=foo" is
erroneously deleted and then immediately restored.

Unfortunately after this replication phase the final state is identical
to the initial state, so when it loops around again (rate limited) it
repeats the same set of operations indefinitely.
This commit is contained in:
R.B. Boyer 2020-10-23 13:41:54 -05:00 committed by GitHub
parent 8bd1a2cd16
commit 58387fef0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 132 additions and 10 deletions

3
.changelog/9024.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:security
Fix Consul Enterprise Namespace Config Entry Replication DoS. Previously an operator with service:write ACL permissions in a Consul Enterprise cluster could write a malicious config entry that caused infinite raft writes due to issues with the namespace replication logic. [CVE-2020-25201] (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-25201)
```

View File

@ -11,12 +11,8 @@ import (
"github.com/hashicorp/go-hclog"
)
func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool {
return first.GetKind() < second.GetKind() || (first.GetKind() == second.GetKind() && first.GetName() < second.GetName())
}
func configSort(configs []structs.ConfigEntry) {
sort.Slice(configs, func(i, j int) bool {
sort.SliceStable(configs, func(i, j int) bool {
return cmpConfigLess(configs[i], configs[j])
})
}
@ -25,12 +21,14 @@ func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry
configSort(local)
configSort(remote)
var deletions []structs.ConfigEntry
var updates []structs.ConfigEntry
var localIdx int
var remoteIdx int
var (
deletions []structs.ConfigEntry
updates []structs.ConfigEntry
localIdx int
remoteIdx int
)
for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); {
if local[localIdx].GetKind() == remote[remoteIdx].GetKind() && local[localIdx].GetName() == remote[remoteIdx].GetName() {
if configSameID(local[localIdx], remote[remoteIdx]) {
// config is in both the local and remote state - need to check raft indices
if remote[remoteIdx].GetRaftIndex().ModifyIndex > lastRemoteIndex {
updates = append(updates, remote[remoteIdx])
@ -64,6 +62,30 @@ func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry
return deletions, updates
}
func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool {
if first.GetKind() < second.GetKind() {
return true
}
if first.GetKind() > second.GetKind() {
return false
}
if first.GetEnterpriseMeta().LessThan(second.GetEnterpriseMeta()) {
return true
}
if second.GetEnterpriseMeta().LessThan(first.GetEnterpriseMeta()) {
return false
}
return first.GetName() < second.GetName()
}
func configSameID(e1, e2 structs.ConfigEntry) bool {
return e1.GetKind() == e2.GetKind() &&
e1.GetEnterpriseMeta().IsSame(e2.GetEnterpriseMeta()) &&
e1.GetName() == e2.GetName()
}
func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.ConfigEntry, op structs.ConfigEntryOp) (bool, error) {
ticker := time.NewTicker(time.Second / time.Duration(s.config.ConfigReplicationApplyLimit))
defer ticker.Stop()

View File

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"testing"
"time"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil/retry"
@ -11,6 +12,85 @@ import (
"github.com/stretchr/testify/require"
)
func TestReplication_ConfigSort(t *testing.T) {
newDefaults := func(name, protocol string) *structs.ServiceConfigEntry {
return &structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: name,
Protocol: protocol,
}
}
newResolver := func(name string, timeout time.Duration) *structs.ServiceResolverConfigEntry {
return &structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: name,
ConnectTimeout: timeout,
}
}
type testcase struct {
configs []structs.ConfigEntry
expect []structs.ConfigEntry
}
cases := map[string]testcase{
"none": {},
"one": {
configs: []structs.ConfigEntry{
newDefaults("web", "grpc"),
},
expect: []structs.ConfigEntry{
newDefaults("web", "grpc"),
},
},
"just kinds": {
configs: []structs.ConfigEntry{
newResolver("web", 33*time.Second),
newDefaults("web", "grpc"),
},
expect: []structs.ConfigEntry{
newDefaults("web", "grpc"),
newResolver("web", 33*time.Second),
},
},
"just names": {
configs: []structs.ConfigEntry{
newDefaults("db", "grpc"),
newDefaults("api", "http2"),
},
expect: []structs.ConfigEntry{
newDefaults("api", "http2"),
newDefaults("db", "grpc"),
},
},
"all": {
configs: []structs.ConfigEntry{
newResolver("web", 33*time.Second),
newDefaults("web", "grpc"),
newDefaults("db", "grpc"),
newDefaults("api", "http2"),
},
expect: []structs.ConfigEntry{
newDefaults("api", "http2"),
newDefaults("db", "grpc"),
newDefaults("web", "grpc"),
newResolver("web", 33*time.Second),
},
},
}
for name, tc := range cases {
tc := tc
t.Run(name, func(t *testing.T) {
configSort(tc.configs)
require.Equal(t, tc.expect, tc.configs)
// and it should be stable
configSort(tc.configs)
require.Equal(t, tc.expect, tc.configs)
})
}
}
func TestReplication_ConfigEntries(t *testing.T) {
t.Parallel()
dir1, s1 := testServerWithConfig(t, func(c *Config) {

View File

@ -97,6 +97,12 @@ type ServiceConfigEntry struct {
RaftIndex
}
func (e *ServiceConfigEntry) Clone() *ServiceConfigEntry {
e2 := *e
e2.Expose = e.Expose.Clone()
return &e2
}
func (e *ServiceConfigEntry) GetKind() string {
return ServiceDefaults
}

View File

@ -418,6 +418,17 @@ type ExposeConfig struct {
Paths []ExposePath `json:",omitempty"`
}
func (e ExposeConfig) Clone() ExposeConfig {
e2 := e
if len(e.Paths) > 0 {
e2.Paths = make([]ExposePath, 0, len(e.Paths))
for _, p := range e.Paths {
e2.Paths = append(e2.Paths, p)
}
}
return e2
}
type ExposePath struct {
// ListenerPort defines the port of the proxy's listener for exposed paths.
ListenerPort int `json:",omitempty" alias:"listener_port"`