From 2b14a9b59ae225d99a5308362cb32c72f0e2ea11 Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sat, 7 Aug 2021 13:21:12 +0100 Subject: [PATCH 1/7] grpc Server: turn panic into error through middleware --- agent/grpc/handler.go | 11 ++++++++++- agent/xds/server.go | 8 ++++++++ go.mod | 3 ++- go.sum | 7 +++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/agent/grpc/handler.go b/agent/grpc/handler.go index 53705b4dde..e21a4b768b 100644 --- a/agent/grpc/handler.go +++ b/agent/grpc/handler.go @@ -10,6 +10,9 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/keepalive" + + middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" ) // NewHandler returns a gRPC server that accepts connections from Handle(conn). @@ -20,8 +23,14 @@ func NewHandler(addr net.Addr, register func(server *grpc.Server)) *Handler { // We don't need to pass tls.Config to the server since it's multiplexed // behind the RPC listener, which already has TLS configured. srv := grpc.NewServer( + middleware.WithUnaryServerChain( + recovery.UnaryServerInterceptor(), + ), + middleware.WithStreamServerChain( + recovery.StreamServerInterceptor(), + (&activeStreamCounter{metrics: metrics}).Intercept, + ), grpc.StatsHandler(newStatsHandler(metrics)), - grpc.StreamInterceptor((&activeStreamCounter{metrics: metrics}).Intercept), grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ MinTime: 15 * time.Second, }), diff --git a/agent/xds/server.go b/agent/xds/server.go index 8e60371162..19ff44abaf 100644 --- a/agent/xds/server.go +++ b/agent/xds/server.go @@ -13,6 +13,8 @@ import ( "github.com/armon/go-metrics" "github.com/armon/go-metrics/prometheus" + middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" "github.com/hashicorp/go-hclog" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -547,6 +549,12 @@ func tokenFromContext(ctx context.Context) string { func (s *Server) GRPCServer(tlsConfigurator *tlsutil.Configurator) (*grpc.Server, error) { opts := []grpc.ServerOption{ grpc.MaxConcurrentStreams(2048), + middleware.WithUnaryServerChain( + recovery.UnaryServerInterceptor(), + ), + middleware.WithStreamServerChain( + recovery.StreamServerInterceptor(), + ), } if tlsConfigurator != nil { if tlsConfigurator.Cert() != nil { diff --git a/go.mod b/go.mod index eb10c2ad20..c515bbf034 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,7 @@ require ( github.com/google/gofuzz v1.2.0 github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 + github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 github.com/hashicorp/consul/api v1.8.0 github.com/hashicorp/consul/sdk v0.7.0 github.com/hashicorp/go-bexpr v0.1.2 @@ -72,7 +73,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.1 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pierrec/lz4 v2.5.2+incompatible // indirect - github.com/pkg/errors v0.8.1 + github.com/pkg/errors v0.9.1 github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35 // indirect github.com/prometheus/client_golang v1.4.0 github.com/rboyer/safeio v0.2.1 diff --git a/go.sum b/go.sum index 672adbcf53..a886c10f4a 100644 --- a/go.sum +++ b/go.sum @@ -211,7 +211,11 @@ github.com/gophercloud/gophercloud v0.1.0 h1:P/nh25+rzXouhytV2pUHBb65fnds26Ghl8/ github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 h1:1aeRCnE2CkKYqyzBu0+B2lgTcZPc3ea2lGpijeHbI1c= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2/go.mod h1:GhphxcdlaRyAuBSvo6rV71BvQcvB/vuX8ugCyybuS2k= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= @@ -405,6 +409,7 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c h1:vwpFWvAO8DeIZfFeqASzZfsxuWPno9ncAebBEP0N3uE= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c/go.mod h1:otzZQXgoO96RTzDB/Hycg0qZcXZsWJGJRSXbmEIJ+4M= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -420,6 +425,8 @@ github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= From 4b0eaa4bff987b4300287f35ac51a4897dd29fc3 Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sun, 22 Aug 2021 19:06:26 +0100 Subject: [PATCH 2/7] grpc, xds: recovery middleware to return and log error in case of panic 1) xds and grpc servers: 1.1) to use recovery middleware with callback that prints stack trace to log 1.2) callback turn the panic into a core.Internal error 2) added unit test for grpc server --- agent/consul/server.go | 2 +- agent/grpc/client_test.go | 49 ++++++++++++++++++++++++--- agent/grpc/handler.go | 30 ++++++++++++++-- agent/grpc/server_test.go | 35 +++++++++++++++++-- agent/grpc/stats_test.go | 3 +- agent/rpc/subscribe/subscribe_test.go | 2 +- agent/xds/server.go | 25 ++++++++++++-- 7 files changed, 131 insertions(+), 15 deletions(-) diff --git a/agent/consul/server.go b/agent/consul/server.go index e5e4ecb371..a7a6517677 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -640,7 +640,7 @@ func newGRPCHandlerFromConfig(deps Deps, config *Config, s *Server) connHandler &subscribeBackend{srv: s, connPool: deps.GRPCConnPool}, deps.Logger.Named("grpc-api.subscription"))) } - return agentgrpc.NewHandler(config.RPCAddr, register) + return agentgrpc.NewHandler(deps.Logger, config.RPCAddr, register) } func (s *Server) connectCARootsMonitor(ctx context.Context) { diff --git a/agent/grpc/client_test.go b/agent/grpc/client_test.go index 49922a3098..a831bc8ba3 100644 --- a/agent/grpc/client_test.go +++ b/agent/grpc/client_test.go @@ -1,6 +1,7 @@ package grpc import ( + "bytes" "context" "fmt" "net" @@ -11,6 +12,8 @@ import ( "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "github.com/hashicorp/consul/agent/grpc/internal/testservice" "github.com/hashicorp/consul/agent/grpc/resolver" @@ -54,7 +57,7 @@ func TestNewDialer_IntegrationWithTLSEnabledHandler(t *testing.T) { res := resolver.NewServerResolverBuilder(newConfig(t)) registerWithGRPC(t, res) - srv := newTestServer(t, "server-1", "dc1") + srv := newSimpleTestServer(t, "server-1", "dc1") tlsConf, err := tlsutil.NewConfigurator(tlsutil.Config{ VerifyIncoming: true, VerifyOutgoing: true, @@ -91,7 +94,7 @@ func TestClientConnPool_IntegrationWithGRPCResolver_Failover(t *testing.T) { for i := 0; i < count; i++ { name := fmt.Sprintf("server-%d", i) - srv := newTestServer(t, name, "dc1") + srv := newSimpleTestServer(t, name, "dc1") res.AddServer(srv.Metadata()) t.Cleanup(srv.shutdown) } @@ -128,7 +131,7 @@ func TestClientConnPool_IntegrationWithGRPCResolver_Rebalance(t *testing.T) { for i := 0; i < count; i++ { name := fmt.Sprintf("server-%d", i) - srv := newTestServer(t, name, "dc1") + srv := newSimpleTestServer(t, name, "dc1") res.AddServer(srv.Metadata()) t.Cleanup(srv.shutdown) } @@ -177,7 +180,7 @@ func TestClientConnPool_IntegrationWithGRPCResolver_MultiDC(t *testing.T) { for _, dc := range dcs { name := "server-0-" + dc - srv := newTestServer(t, name, dc) + srv := newSimpleTestServer(t, name, dc) res.AddServer(srv.Metadata()) t.Cleanup(srv.shutdown) } @@ -202,3 +205,41 @@ func registerWithGRPC(t *testing.T, b *resolver.ServerResolverBuilder) { resolver.Deregister(b.Authority()) }) } + +func TestRecoverMiddleware(t *testing.T) { + // Prepare a logger with output to a buffer + // so we can check what it writes. + var buf bytes.Buffer + + logger := hclog.New(&hclog.LoggerOptions{ + Output: &buf, + }) + + res := resolver.NewServerResolverBuilder(newConfig(t)) + registerWithGRPC(t, res) + + srv := newPanicTestServer(t, logger, "server-1", "dc1") + res.AddServer(srv.Metadata()) + t.Cleanup(srv.shutdown) + + pool := NewClientConnPool(res, nil, useTLSForDcAlwaysTrue) + + conn, err := pool.ClientConn("dc1") + require.NoError(t, err) + client := testservice.NewSimpleClient(conn) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + t.Cleanup(cancel) + + resp, err := client.Something(ctx, &testservice.Req{}) + expectedErr := status.Errorf(codes.Internal, "grpc: panic serving request: panic from Something") + require.Equal(t, expectedErr, err) + require.Nil(t, resp) + + // Read the log + strLog := buf.String() + // Checking the entire stack trace is not possible, let's + // make sure that it contains a couple of expected strings. + require.Contains(t, strLog, `[ERROR] panic serving grpc request: panic="panic from Something`) + require.Contains(t, strLog, `github.com/hashicorp/consul/agent/grpc.(*simplePanic).Something`) +} diff --git a/agent/grpc/handler.go b/agent/grpc/handler.go index e21a4b768b..3a48679b00 100644 --- a/agent/grpc/handler.go +++ b/agent/grpc/handler.go @@ -4,30 +4,39 @@ Package grpc provides a Handler and client for agent gRPC connections. package grpc import ( + "context" "fmt" "net" "time" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/keepalive" + "google.golang.org/grpc/status" middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" + "github.com/hashicorp/go-hclog" ) // NewHandler returns a gRPC server that accepts connections from Handle(conn). // The register function will be called with the grpc.Server to register // gRPC services with the server. -func NewHandler(addr net.Addr, register func(server *grpc.Server)) *Handler { +func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server)) *Handler { + recoveryOpts := []recovery.Option{ + recovery.WithRecoveryHandlerContext(newPanicHandler(logger)), + } metrics := defaultMetrics() // We don't need to pass tls.Config to the server since it's multiplexed // behind the RPC listener, which already has TLS configured. srv := grpc.NewServer( middleware.WithUnaryServerChain( - recovery.UnaryServerInterceptor(), + // Add middlware interceptors to recover in case of panics. + recovery.UnaryServerInterceptor(recoveryOpts...), ), middleware.WithStreamServerChain( - recovery.StreamServerInterceptor(), + // Add middlware interceptors to recover in case of panics. + recovery.StreamServerInterceptor(recoveryOpts...), (&activeStreamCounter{metrics: metrics}).Intercept, ), grpc.StatsHandler(newStatsHandler(metrics)), @@ -41,6 +50,21 @@ func NewHandler(addr net.Addr, register func(server *grpc.Server)) *Handler { return &Handler{srv: srv, listener: lis} } +// newPanicHandler returns a recovery.RecoveryHandlerFuncContext closure function +// to handle panic in GRPC server's handlers. +func newPanicHandler(logger Logger) recovery.RecoveryHandlerFuncContext { + return func(ctx context.Context, p interface{}) (err error) { + // Log the panic and the stack trace of the Goroutine that caused the panic. + stacktrace := hclog.Stacktrace() + logger.Error("panic serving grpc request", + "panic", p, + "stack", stacktrace, + ) + + return status.Errorf(codes.Internal, "grpc: panic serving request: %v", p) + } +} + // Handler implements a handler for the rpc server listener, and the // agent.Component interface for managing the lifecycle of the grpc.Server. type Handler struct { diff --git a/agent/grpc/server_test.go b/agent/grpc/server_test.go index 442b617d50..d6efa826de 100644 --- a/agent/grpc/server_test.go +++ b/agent/grpc/server_test.go @@ -18,6 +18,7 @@ import ( "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/tlsutil" + "github.com/hashicorp/go-hclog" ) type testServer struct { @@ -37,11 +38,22 @@ func (s testServer) Metadata() *metadata.Server { } } -func newTestServer(t *testing.T, name string, dc string) testServer { - addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} - handler := NewHandler(addr, func(server *grpc.Server) { +func newSimpleTestServer(t *testing.T, name, dc string) testServer { + return newTestServer(t, hclog.Default(), name, dc, func(server *grpc.Server) { testservice.RegisterSimpleServer(server, &simple{name: name, dc: dc}) }) +} + +// newPanicTestServer sets up a simple server with handlers that panic. +func newPanicTestServer(t *testing.T, logger hclog.Logger, name, dc string) testServer { + return newTestServer(t, logger, name, dc, func(server *grpc.Server) { + testservice.RegisterSimpleServer(server, &simplePanic{name: name, dc: dc}) + }) +} + +func newTestServer(t *testing.T, logger hclog.Logger, name, dc string, register func(server *grpc.Server)) testServer { + addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} + handler := NewHandler(logger, addr, register) lis, err := net.Listen("tcp", "127.0.0.1:0") require.NoError(t, err) @@ -101,6 +113,23 @@ func (s *simple) Something(_ context.Context, _ *testservice.Req) (*testservice. return &testservice.Resp{ServerName: s.name, Datacenter: s.dc}, nil } +type simplePanic struct { + name, dc string +} + +func (s *simplePanic) Flow(_ *testservice.Req, flow testservice.Simple_FlowServer) error { + for flow.Context().Err() == nil { + time.Sleep(time.Millisecond) + panic("panic from Flow") + } + return nil +} + +func (s *simplePanic) Something(_ context.Context, _ *testservice.Req) (*testservice.Resp, error) { + time.Sleep(time.Millisecond) + panic("panic from Something") +} + // fakeRPCListener mimics agent/consul.Server.listen to handle the RPCType byte. // In the future we should be able to refactor Server and extract this RPC // handling logic so that we don't need to use a fake. diff --git a/agent/grpc/stats_test.go b/agent/grpc/stats_test.go index 475bbf6dfb..079de34086 100644 --- a/agent/grpc/stats_test.go +++ b/agent/grpc/stats_test.go @@ -15,6 +15,7 @@ import ( "google.golang.org/grpc" "github.com/hashicorp/consul/agent/grpc/internal/testservice" + "github.com/hashicorp/go-hclog" ) func noopRegister(*grpc.Server) {} @@ -23,7 +24,7 @@ func TestHandler_EmitsStats(t *testing.T) { sink, reset := patchGlobalMetrics(t) addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} - handler := NewHandler(addr, noopRegister) + handler := NewHandler(hclog.Default(), addr, noopRegister) reset() testservice.RegisterSimpleServer(handler.srv, &simple{}) diff --git a/agent/rpc/subscribe/subscribe_test.go b/agent/rpc/subscribe/subscribe_test.go index d2c13716dc..7ec636ec86 100644 --- a/agent/rpc/subscribe/subscribe_test.go +++ b/agent/rpc/subscribe/subscribe_test.go @@ -317,7 +317,7 @@ var _ Backend = (*testBackend)(nil) func runTestServer(t *testing.T, server *Server) net.Addr { addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} var grpcServer *gogrpc.Server - handler := grpc.NewHandler(addr, func(srv *gogrpc.Server) { + handler := grpc.NewHandler(hclog.New(nil), addr, func(srv *gogrpc.Server) { grpcServer = srv pbsubscribe.RegisterStateChangeSubscriptionServer(srv, server) }) diff --git a/agent/xds/server.go b/agent/xds/server.go index 19ff44abaf..011cdb6536 100644 --- a/agent/xds/server.go +++ b/agent/xds/server.go @@ -545,15 +545,36 @@ func tokenFromContext(ctx context.Context) string { return "" } +// newPanicHandler returns a recovery.RecoveryHandlerFuncContext closure function +// to handle panic in GRPC server's handlers. +func newPanicHandler(logger hclog.Logger) recovery.RecoveryHandlerFuncContext { + return func(ctx context.Context, p interface{}) (err error) { + // Log the panic and the stack trace of the Goroutine that caused the panic. + stacktrace := hclog.Stacktrace() + logger.Error("panic serving grpc request", + "panic", p, + "stack", stacktrace, + ) + + return status.Errorf(codes.Internal, "grpc: panic serving request: %v", p) + } +} + // GRPCServer returns a server instance that can handle xDS requests. func (s *Server) GRPCServer(tlsConfigurator *tlsutil.Configurator) (*grpc.Server, error) { + recoveryOpts := []recovery.Option{ + recovery.WithRecoveryHandlerContext(newPanicHandler(s.Logger)), + } + opts := []grpc.ServerOption{ grpc.MaxConcurrentStreams(2048), middleware.WithUnaryServerChain( - recovery.UnaryServerInterceptor(), + // Add middlware interceptors to recover in case of panics. + recovery.UnaryServerInterceptor(recoveryOpts...), ), middleware.WithStreamServerChain( - recovery.StreamServerInterceptor(), + // Add middlware interceptors to recover in case of panics. + recovery.StreamServerInterceptor(recoveryOpts...), ), } if tlsConfigurator != nil { From 11805576134eec26578ebfcfe679111c35cdb2ae Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sun, 22 Aug 2021 19:21:42 +0100 Subject: [PATCH 3/7] Added changelog for grpc and xds servers panic recovery. --- .changelog/10895.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/10895.txt diff --git a/.changelog/10895.txt b/.changelog/10895.txt new file mode 100644 index 0000000000..b490800d04 --- /dev/null +++ b/.changelog/10895.txt @@ -0,0 +1,3 @@ +```release-note:improvement +grpc, xds: improved reliability of grpc and xds servers by adding recovery-middleware to return and log error in case of panic. +``` From 7fa01105ccfbfd2d993e665703b13e905585b41d Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sun, 22 Aug 2021 19:35:08 +0100 Subject: [PATCH 4/7] Fix merge conflicts --- agent/grpc/client_test.go | 2 +- agent/xds/server.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agent/grpc/client_test.go b/agent/grpc/client_test.go index 1bec1f580f..284daae9e3 100644 --- a/agent/grpc/client_test.go +++ b/agent/grpc/client_test.go @@ -126,7 +126,7 @@ func TestClientConnPool_ForwardToLeader_Failover(t *testing.T) { var servers []testServer for i := 0; i < count; i++ { name := fmt.Sprintf("server-%d", i) - srv := newTestServer(t, name, "dc1") + srv := newSimpleTestServer(t, name, "dc1") res.AddServer(srv.Metadata()) servers = append(servers, srv) t.Cleanup(srv.shutdown) diff --git a/agent/xds/server.go b/agent/xds/server.go index c3ddbda426..c0be9c560d 100644 --- a/agent/xds/server.go +++ b/agent/xds/server.go @@ -563,7 +563,7 @@ func newPanicHandler(logger hclog.Logger) recovery.RecoveryHandlerFuncContext { // NewGRPCServer creates a grpc.Server, registers the Server, and then returns // the grpc.Server. func NewGRPCServer(s *Server, tlsConfigurator *tlsutil.Configurator) *grpc.Server { - recoveryOpts := []recovery.Option{ + recoveryOpts := []recovery.Option{ recovery.WithRecoveryHandlerContext(newPanicHandler(s.Logger)), } From cbf437efdb12b80277c89acd0b5bcf1ef5fc42fd Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sun, 22 Aug 2021 19:50:10 +0100 Subject: [PATCH 5/7] Fix go.sum with go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index c195111574..bbae367037 100644 --- a/go.sum +++ b/go.sum @@ -214,7 +214,6 @@ github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoA github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 h1:1aeRCnE2CkKYqyzBu0+B2lgTcZPc3ea2lGpijeHbI1c= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2/go.mod h1:GhphxcdlaRyAuBSvo6rV71BvQcvB/vuX8ugCyybuS2k= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= From fecce2565885c0ee87fd6ca1a66a8bc5c0264519 Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sat, 16 Oct 2021 18:02:03 +0100 Subject: [PATCH 6/7] Separete test file and no stack trace in ret error --- agent/grpc/client_test.go | 46 ----------------------------- agent/grpc/handler.go | 39 +++++++++++++++---------- agent/grpc/handler_test.go | 59 ++++++++++++++++++++++++++++++++++++++ agent/xds/server.go | 24 +++------------- go.mod | 2 +- go.sum | 4 --- 6 files changed, 88 insertions(+), 86 deletions(-) create mode 100644 agent/grpc/handler_test.go diff --git a/agent/grpc/client_test.go b/agent/grpc/client_test.go index 62cfa8d16a..07715924da 100644 --- a/agent/grpc/client_test.go +++ b/agent/grpc/client_test.go @@ -1,7 +1,6 @@ package grpc import ( - "bytes" "context" "fmt" "net" @@ -15,8 +14,6 @@ import ( "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" "github.com/hashicorp/consul/agent/grpc/internal/testservice" "github.com/hashicorp/consul/agent/grpc/resolver" @@ -444,46 +441,3 @@ func registerWithGRPC(t *testing.T, b *resolver.ServerResolverBuilder) { resolver.Deregister(b.Authority()) }) } - -func TestRecoverMiddleware(t *testing.T) { - // Prepare a logger with output to a buffer - // so we can check what it writes. - var buf bytes.Buffer - - logger := hclog.New(&hclog.LoggerOptions{ - Output: &buf, - }) - - res := resolver.NewServerResolverBuilder(newConfig(t)) - registerWithGRPC(t, res) - - srv := newPanicTestServer(t, logger, "server-1", "dc1", nil) - res.AddServer(srv.Metadata()) - t.Cleanup(srv.shutdown) - - pool := NewClientConnPool(ClientConnPoolConfig{ - Servers: res, - UseTLSForDC: useTLSForDcAlwaysTrue, - DialingFromServer: true, - DialingFromDatacenter: "dc1", - }) - - conn, err := pool.ClientConn("dc1") - require.NoError(t, err) - client := testservice.NewSimpleClient(conn) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - t.Cleanup(cancel) - - resp, err := client.Something(ctx, &testservice.Req{}) - expectedErr := status.Errorf(codes.Internal, "grpc: panic serving request: panic from Something") - require.Equal(t, expectedErr, err) - require.Nil(t, resp) - - // Read the log - strLog := buf.String() - // Checking the entire stack trace is not possible, let's - // make sure that it contains a couple of expected strings. - require.Contains(t, strLog, `[ERROR] panic serving grpc request: panic="panic from Something`) - require.Contains(t, strLog, `github.com/hashicorp/consul/agent/grpc.(*simplePanic).Something`) -} diff --git a/agent/grpc/handler.go b/agent/grpc/handler.go index 3a48679b00..e7614c4569 100644 --- a/agent/grpc/handler.go +++ b/agent/grpc/handler.go @@ -4,7 +4,6 @@ Package grpc provides a Handler and client for agent gRPC connections. package grpc import ( - "context" "fmt" "net" "time" @@ -14,8 +13,8 @@ import ( "google.golang.org/grpc/keepalive" "google.golang.org/grpc/status" - middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" + middleware "github.com/grpc-ecosystem/go-grpc-middleware" + recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery" "github.com/hashicorp/go-hclog" ) @@ -23,13 +22,12 @@ import ( // The register function will be called with the grpc.Server to register // gRPC services with the server. func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server)) *Handler { - recoveryOpts := []recovery.Option{ - recovery.WithRecoveryHandlerContext(newPanicHandler(logger)), - } metrics := defaultMetrics() - // We don't need to pass tls.Config to the server since it's multiplexed - // behind the RPC listener, which already has TLS configured. - srv := grpc.NewServer( + + recoveryOpts := PanicHandlerMiddlewareOpts(logger) + + opts := []grpc.ServerOption{ + grpc.StatsHandler(newStatsHandler(metrics)), middleware.WithUnaryServerChain( // Add middlware interceptors to recover in case of panics. recovery.UnaryServerInterceptor(recoveryOpts...), @@ -39,21 +37,32 @@ func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server) recovery.StreamServerInterceptor(recoveryOpts...), (&activeStreamCounter{metrics: metrics}).Intercept, ), - grpc.StatsHandler(newStatsHandler(metrics)), grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ MinTime: 15 * time.Second, }), - ) + } + + // We don't need to pass tls.Config to the server since it's multiplexed + // behind the RPC listener, which already has TLS configured. + srv := grpc.NewServer(opts...) register(srv) lis := &chanListener{addr: addr, conns: make(chan net.Conn), done: make(chan struct{})} return &Handler{srv: srv, listener: lis} } -// newPanicHandler returns a recovery.RecoveryHandlerFuncContext closure function +// PanicHandlerMiddlewareOpts returns the []recovery.Option containing +// recovery handler function. +func PanicHandlerMiddlewareOpts(logger Logger) []recovery.Option { + return []recovery.Option{ + recovery.WithRecoveryHandler(NewPanicHandler(logger)), + } +} + +// NewPanicHandler returns a recovery.RecoveryHandlerFunc closure function // to handle panic in GRPC server's handlers. -func newPanicHandler(logger Logger) recovery.RecoveryHandlerFuncContext { - return func(ctx context.Context, p interface{}) (err error) { +func NewPanicHandler(logger Logger) recovery.RecoveryHandlerFunc { + return func(p interface{}) (err error) { // Log the panic and the stack trace of the Goroutine that caused the panic. stacktrace := hclog.Stacktrace() logger.Error("panic serving grpc request", @@ -61,7 +70,7 @@ func newPanicHandler(logger Logger) recovery.RecoveryHandlerFuncContext { "stack", stacktrace, ) - return status.Errorf(codes.Internal, "grpc: panic serving request: %v", p) + return status.Errorf(codes.Internal, "grpc: panic serving request") } } diff --git a/agent/grpc/handler_test.go b/agent/grpc/handler_test.go new file mode 100644 index 0000000000..908bed0b10 --- /dev/null +++ b/agent/grpc/handler_test.go @@ -0,0 +1,59 @@ +package grpc + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/hashicorp/consul/agent/grpc/internal/testservice" + "github.com/hashicorp/consul/agent/grpc/resolver" +) + +func TestHandler_PanicRecoveryInterceptor(t *testing.T) { + // Prepare a logger with output to a buffer + // so we can check what it writes. + var buf bytes.Buffer + + logger := hclog.New(&hclog.LoggerOptions{ + Output: &buf, + }) + + res := resolver.NewServerResolverBuilder(newConfig(t)) + registerWithGRPC(t, res) + + srv := newPanicTestServer(t, logger, "server-1", "dc1", nil) + res.AddServer(srv.Metadata()) + t.Cleanup(srv.shutdown) + + pool := NewClientConnPool(ClientConnPoolConfig{ + Servers: res, + UseTLSForDC: useTLSForDcAlwaysTrue, + DialingFromServer: true, + DialingFromDatacenter: "dc1", + }) + + conn, err := pool.ClientConn("dc1") + require.NoError(t, err) + client := testservice.NewSimpleClient(conn) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + t.Cleanup(cancel) + + resp, err := client.Something(ctx, &testservice.Req{}) + expectedErr := status.Errorf(codes.Internal, "grpc: panic serving request") + require.Equal(t, expectedErr, err) + require.Nil(t, resp) + + // Read the log + strLog := buf.String() + // Checking the entire stack trace is not possible, let's + // make sure that it contains a couple of expected strings. + require.Contains(t, strLog, `[ERROR] panic serving grpc request: panic="panic from Something`) + require.Contains(t, strLog, `github.com/hashicorp/consul/agent/grpc.(*simplePanic).Something`) +} diff --git a/agent/xds/server.go b/agent/xds/server.go index ea6cfa2790..205455be40 100644 --- a/agent/xds/server.go +++ b/agent/xds/server.go @@ -10,11 +10,11 @@ import ( envoy_config_core_v3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" envoy_discovery_v2 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v2" envoy_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" + middleware "github.com/grpc-ecosystem/go-grpc-middleware" + recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery" "github.com/armon/go-metrics" "github.com/armon/go-metrics/prometheus" - middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" "github.com/hashicorp/go-hclog" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -23,6 +23,7 @@ import ( "google.golang.org/grpc/status" "github.com/hashicorp/consul/acl" + agentgrpc "github.com/hashicorp/consul/agent/grpc" "github.com/hashicorp/consul/agent/proxycfg" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/logging" @@ -550,27 +551,10 @@ func tokenFromContext(ctx context.Context) string { return "" } -// newPanicHandler returns a recovery.RecoveryHandlerFuncContext closure function -// to handle panic in GRPC server's handlers. -func newPanicHandler(logger hclog.Logger) recovery.RecoveryHandlerFuncContext { - return func(ctx context.Context, p interface{}) (err error) { - // Log the panic and the stack trace of the Goroutine that caused the panic. - stacktrace := hclog.Stacktrace() - logger.Error("panic serving grpc request", - "panic", p, - "stack", stacktrace, - ) - - return status.Errorf(codes.Internal, "grpc: panic serving request: %v", p) - } -} - // NewGRPCServer creates a grpc.Server, registers the Server, and then returns // the grpc.Server. func NewGRPCServer(s *Server, tlsConfigurator *tlsutil.Configurator) *grpc.Server { - recoveryOpts := []recovery.Option{ - recovery.WithRecoveryHandlerContext(newPanicHandler(s.Logger)), - } + recoveryOpts := agentgrpc.PanicHandlerMiddlewareOpts(s.Logger) opts := []grpc.ServerOption{ grpc.MaxConcurrentStreams(2048), diff --git a/go.mod b/go.mod index fea337aa6f..b8c95fb1cd 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/google/gofuzz v1.2.0 github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 - github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 + github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 github.com/hashicorp/consul/api v1.8.0 github.com/hashicorp/consul/sdk v0.7.0 github.com/hashicorp/go-bexpr v0.1.2 diff --git a/go.sum b/go.sum index dded0ee10f..bb4ef8c894 100644 --- a/go.sum +++ b/go.sum @@ -213,8 +213,6 @@ github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoA github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 h1:1aeRCnE2CkKYqyzBu0+B2lgTcZPc3ea2lGpijeHbI1c= -github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2/go.mod h1:GhphxcdlaRyAuBSvo6rV71BvQcvB/vuX8ugCyybuS2k= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= @@ -408,7 +406,6 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c h1:vwpFWvAO8DeIZfFeqASzZfsxuWPno9ncAebBEP0N3uE= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c/go.mod h1:otzZQXgoO96RTzDB/Hycg0qZcXZsWJGJRSXbmEIJ+4M= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -422,7 +419,6 @@ github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From a3fb665b882366796f4492ef24f08cf9ef45845b Mon Sep 17 00:00:00 2001 From: Giulio Micheloni Date: Sat, 16 Oct 2021 18:05:32 +0100 Subject: [PATCH 7/7] Restored comment. --- agent/grpc/handler.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/grpc/handler.go b/agent/grpc/handler.go index e7614c4569..886186dd80 100644 --- a/agent/grpc/handler.go +++ b/agent/grpc/handler.go @@ -24,6 +24,8 @@ import ( func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server)) *Handler { metrics := defaultMetrics() + // We don't need to pass tls.Config to the server since it's multiplexed + // behind the RPC listener, which already has TLS configured. recoveryOpts := PanicHandlerMiddlewareOpts(logger) opts := []grpc.ServerOption{