Update topology mapping Refs on all proxy instance deletions (#9589)

* Insert new upstream/downstream mapping to persist new Refs

* Avoid upserting mapping copy if it's a no-op

* Add test with panic repro

* Avoid deleting up/downstreams from inside memdb iterator

* Avoid deleting gateway mappings from inside memdb iterator

* Add CHANGELOG entry

* Tweak changelog entry

Co-authored-by: Paul Banks <banks@banksco.de>
This commit is contained in:
Freddy 2021-01-20 08:17:26 -07:00 committed by hashicorp-ci
parent 7f14ce7a7e
commit f2cfbde1b0
3 changed files with 181 additions and 19 deletions

5
.changelog/9589.txt Normal file
View File

@ -0,0 +1,5 @@
```release-note:bug
server: Fixes a server panic introduced in 1.9.0 where Connect service mesh is
being used. Node de-registration could panic if it hosted services with
multiple upstreams.
```

View File

@ -2784,21 +2784,28 @@ func cleanupGatewayWildcards(tx WriteTxn, idx uint64, svc *structs.ServiceNode)
if err != nil {
return fmt.Errorf("failed gateway lookup for %q: %s", svc.ServiceName, err)
}
mappings := make([]*structs.GatewayService, 0)
for mapping := gateways.Next(); mapping != nil; mapping = gateways.Next() {
if gs, ok := mapping.(*structs.GatewayService); ok && gs != nil {
// Only delete if association was created by a wildcard specifier.
// Otherwise the service was specified in the config entry, and the association should be maintained
// for when the service is re-registered
if gs.FromWildcard {
if err := tx.Delete(gatewayServicesTableName, gs); err != nil {
return fmt.Errorf("failed to truncate gateway services table: %v", err)
}
if err := indexUpdateMaxTxn(tx, idx, gatewayServicesTableName); err != nil {
return fmt.Errorf("failed updating gateway-services index: %v", err)
}
if err := deleteGatewayServiceTopologyMapping(tx, idx, gs); err != nil {
return fmt.Errorf("failed to reconcile mesh topology for gateway: %v", err)
}
mappings = append(mappings, gs)
}
}
// Do the updates in a separate loop so we don't trash the iterator.
for _, m := range mappings {
// Only delete if association was created by a wildcard specifier.
// Otherwise the service was specified in the config entry, and the association should be maintained
// for when the service is re-registered
if m.FromWildcard {
if err := tx.Delete(gatewayServicesTableName, m); err != nil {
return fmt.Errorf("failed to truncate gateway services table: %v", err)
}
if err := indexUpdateMaxTxn(tx, idx, gatewayServicesTableName); err != nil {
return fmt.Errorf("failed updating gateway-services index: %v", err)
}
if err := deleteGatewayServiceTopologyMapping(tx, idx, m); err != nil {
return fmt.Errorf("failed to reconcile mesh topology for gateway: %v", err)
}
}
}
@ -3272,9 +3279,15 @@ func cleanupMeshTopology(tx WriteTxn, idx uint64, service *structs.ServiceNode)
if err != nil {
return fmt.Errorf("%q lookup failed: %v", topologyTableName, err)
}
mappings := make([]*structs.UpstreamDownstream, 0)
for raw := iter.Next(); raw != nil; raw = iter.Next() {
entry := raw.(*structs.UpstreamDownstream)
rawCopy, err := copystructure.Copy(entry)
mappings = append(mappings, raw.(*structs.UpstreamDownstream))
}
// Do the updates in a separate loop so we don't trash the iterator.
for _, m := range mappings {
rawCopy, err := copystructure.Copy(m)
if err != nil {
return fmt.Errorf("failed to copy existing topology mapping: %v", err)
}
@ -3282,15 +3295,25 @@ func cleanupMeshTopology(tx WriteTxn, idx uint64, service *structs.ServiceNode)
if !ok {
return fmt.Errorf("unexpected topology type %T", rawCopy)
}
delete(copy.Refs, uid)
// Bail early if there's no reference to the proxy ID we're deleting
if _, ok := copy.Refs[uid]; !ok {
continue
}
delete(copy.Refs, uid)
if len(copy.Refs) == 0 {
if err := tx.Delete(topologyTableName, entry); err != nil {
if err := tx.Delete(topologyTableName, m); err != nil {
return fmt.Errorf("failed to truncate %s table: %v", topologyTableName, err)
}
if err := indexUpdateMaxTxn(tx, idx, topologyTableName); err != nil {
return fmt.Errorf("failed updating %s index: %v", topologyTableName, err)
}
continue
}
if err := tx.Insert(topologyTableName, copy); err != nil {
return fmt.Errorf("failed inserting %s mapping: %s", topologyTableName, err)
}
}
return nil

View File

@ -6691,7 +6691,7 @@ func TestCatalog_upstreamsFromRegistration_Watches(t *testing.T) {
ws = memdb.NewWatchSet()
tx = s.db.ReadTxn()
idx, _, err = upstreamsFromRegistrationTxn(tx, ws, web)
idx, names, err = upstreamsFromRegistrationTxn(tx, ws, web)
require.NoError(t, err)
@ -6700,7 +6700,72 @@ func TestCatalog_upstreamsFromRegistration_Watches(t *testing.T) {
idx: 5,
}
require.Equal(t, exp.idx, idx)
require.Empty(t, exp.names)
require.Equal(t, exp.names, names)
}
func TestCatalog_topologyCleanupPanic(t *testing.T) {
s := testStateStore(t)
require.NoError(t, s.EnsureNode(0, &structs.Node{
ID: "c73b8fdf-4ef8-4e43-9aa2-59e85cc6a70c",
Node: "foo",
}))
defaultMeta := structs.DefaultEnterpriseMeta()
web := structs.NewServiceName("web", defaultMeta)
ws := memdb.NewWatchSet()
tx := s.db.ReadTxn()
idx, names, err := upstreamsFromRegistrationTxn(tx, ws, web)
require.NoError(t, err)
assert.Zero(t, idx)
assert.Len(t, names, 0)
svc := structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "web-proxy-1",
Service: "web-proxy",
Address: "127.0.0.2",
Port: 443,
Proxy: structs.ConnectProxyConfig{
DestinationServiceName: "web",
Upstreams: structs.Upstreams{
structs.Upstream{
DestinationName: "db",
},
},
},
EnterpriseMeta: *defaultMeta,
}
require.NoError(t, s.EnsureService(1, "foo", &svc))
assert.True(t, watchFired(ws))
svc = structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "web-proxy-2",
Service: "web-proxy",
Address: "127.0.0.2",
Port: 443,
Proxy: structs.ConnectProxyConfig{
DestinationServiceName: "web",
Upstreams: structs.Upstreams{
structs.Upstream{
DestinationName: "db",
},
structs.Upstream{
DestinationName: "cache",
},
},
},
EnterpriseMeta: *defaultMeta,
}
require.NoError(t, s.EnsureService(2, "foo", &svc))
assert.True(t, watchFired(ws))
// Now delete the node Foo, and this would panic because of the deletion within an iterator
require.NoError(t, s.DeleteNode(3, "foo"))
assert.True(t, watchFired(ws))
}
func TestCatalog_upstreamsFromRegistration_Ingress(t *testing.T) {
@ -6915,6 +6980,75 @@ func TestCatalog_upstreamsFromRegistration_Ingress(t *testing.T) {
require.Len(t, names, 0)
}
func TestCatalog_cleanupGatewayWildcards_panic(t *testing.T) {
s := testStateStore(t)
require.NoError(t, s.EnsureNode(0, &structs.Node{
ID: "c73b8fdf-4ef8-4e43-9aa2-59e85cc6a70c",
Node: "foo",
}))
require.NoError(t, s.EnsureConfigEntry(1, &structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: structs.ProxyConfigGlobal,
Config: map[string]interface{}{
"protocol": "http",
},
}, nil))
defaultMeta := structs.DefaultEnterpriseMeta()
// Register two different gateways that target services via wildcard
require.NoError(t, s.EnsureConfigEntry(2, &structs.TerminatingGatewayConfigEntry{
Kind: "terminating-gateway",
Name: "my-gateway-1-terminating",
Services: []structs.LinkedService{
{
Name: "*",
EnterpriseMeta: *defaultMeta,
},
},
}, nil))
require.NoError(t, s.EnsureConfigEntry(3, &structs.IngressGatewayConfigEntry{
Kind: "ingress-gateway",
Name: "my-gateway-2-ingress",
Listeners: []structs.IngressListener{
{
Port: 1111,
Protocol: "http",
Services: []structs.IngressService{
{
Name: "*",
EnterpriseMeta: *defaultMeta,
},
},
},
},
}, nil))
// Register two services that share a prefix, both will be covered by gateway wildcards above
api := structs.NodeService{
ID: "api",
Service: "api",
Address: "127.0.0.2",
Port: 443,
EnterpriseMeta: *defaultMeta,
}
require.NoError(t, s.EnsureService(4, "foo", &api))
api2 := structs.NodeService{
ID: "api-2",
Service: "api-2",
Address: "127.0.0.2",
Port: 443,
EnterpriseMeta: *defaultMeta,
}
require.NoError(t, s.EnsureService(5, "foo", &api2))
// Now delete the node "foo", and this would panic because of the deletion within an iterator
require.NoError(t, s.DeleteNode(6, "foo"))
}
func TestCatalog_DownstreamsForService(t *testing.T) {
defaultMeta := structs.DefaultEnterpriseMeta()