mirror of https://github.com/status-im/consul.git
Fix nil-pointer panics from proxycfg package. (#16277)
Prior to this PR, servers / agents would panic and crash if an ingress or api gateway were configured to use a discovery chain that both: 1. Referenced a peered service 2. Had a mesh gateway mode of local This could occur, because code for handling upstream watches was shared between both connect-proxy and the gateways. As a short-term fix, this PR ensures that the maps are always initialized for these gateway services. This PR also wraps the proxycfg execution and service registration calls with recover statements to ensure that future issues like this do not put the server into an unrecoverable state.
This commit is contained in:
parent
1d9ee50681
commit
6599a9be1d
|
@ -73,6 +73,7 @@ func (h *handlerAPIGateway) initialize(ctx context.Context) (ConfigSnapshot, err
|
|||
snap.APIGateway.WatchedDiscoveryChains = make(map[UpstreamID]context.CancelFunc)
|
||||
snap.APIGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.APIGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.APIGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]()
|
||||
snap.APIGateway.WatchedUpstreams = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.APIGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@ func (s *handlerIngressGateway) initialize(ctx context.Context) (ConfigSnapshot,
|
|||
snap.IngressGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.IngressGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.IngressGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.IngressGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]()
|
||||
snap.IngressGateway.Listeners = make(map[IngressListenerKey]structs.IngressListener)
|
||||
snap.IngressGateway.UpstreamPeerTrustBundles = watch.NewMap[string, *pbpeering.PeeringTrustBundle]()
|
||||
snap.IngressGateway.PeerUpstreamEndpoints = watch.NewMap[UpstreamID, structs.CheckServiceNodes]()
|
||||
|
|
|
@ -2,6 +2,7 @@ package proxycfg
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
@ -142,6 +143,20 @@ func (m *Manager) Register(id ProxyID, ns *structs.NodeService, source ProxySour
|
|||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
m.Logger.Error("unexpected panic during service manager registration",
|
||||
"node", id.NodeName,
|
||||
"service", id.ServiceID,
|
||||
"message", r,
|
||||
"stacktrace", string(debug.Stack()),
|
||||
)
|
||||
}
|
||||
}()
|
||||
return m.register(id, ns, source, token, overwrite)
|
||||
}
|
||||
|
||||
func (m *Manager) register(id ProxyID, ns *structs.NodeService, source ProxySource, token string, overwrite bool) error {
|
||||
state, ok := m.proxies[id]
|
||||
if ok {
|
||||
if state.source != source && !overwrite {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"fmt"
|
||||
"net"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -298,6 +299,21 @@ func newConfigSnapshotFromServiceInstance(s serviceInstance, config stateConfig)
|
|||
}
|
||||
|
||||
func (s *state) run(ctx context.Context, snap *ConfigSnapshot) {
|
||||
// Add a recover here so than any panics do not make their way up
|
||||
// into the server / agent.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
s.logger.Error("unexpected panic while running proxycfg",
|
||||
"node", s.serviceInstance.proxyID.NodeName,
|
||||
"service", s.serviceInstance.proxyID.ServiceID,
|
||||
"message", r,
|
||||
"stacktrace", string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
s.unsafeRun(ctx, snap)
|
||||
}
|
||||
|
||||
func (s *state) unsafeRun(ctx context.Context, snap *ConfigSnapshot) {
|
||||
// Close the channel we return from Watch when we stop so consumers can stop
|
||||
// watching and clean up their goroutines. It's important we do this here and
|
||||
// not in Close since this routine sends on this chan and so might panic if it
|
||||
|
|
Loading…
Reference in New Issue