server: when the v2 catalog experiment is enabled reject api and rpc requests that are for the v1 catalog (#19129)

When the v2 catalog experiment is enabled the old v1 catalog apis will be
forcibly disabled at both the API (json) layer and the RPC (msgpack) layer.
This will also disable anti-entropy as it uses the v1 api.

This includes all of /v1/catalog/*, /v1/health/*, most of /v1/agent/*,
/v1/config/*, and most of /v1/internal/*.
This commit is contained in:
R.B. Boyer 2023-10-11 10:44:03 -05:00 committed by GitHub
parent ab1e08f1a4
commit b9ab63c55d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 329 additions and 29 deletions

View File

@ -81,8 +81,9 @@ type StateSyncer struct {
SyncChanges *Trigger
// paused stores whether sync runs are temporarily disabled.
pauseLock sync.Mutex
paused int
pauseLock sync.Mutex
paused int
hardDisabled bool
// serverUpInterval is the max time after which a full sync is
// performed when a server has been added to the cluster.
@ -151,9 +152,20 @@ const (
retryFullSyncState fsmState = "retryFullSync"
)
// HardDisableSync is like PauseSync but is one-way. It causes other
// Pause/Resume/Start operations to be completely ignored.
func (s *StateSyncer) HardDisableSync() {
s.pauseLock.Lock()
s.hardDisabled = true
s.pauseLock.Unlock()
}
// Run is the long running method to perform state synchronization
// between local and remote servers.
func (s *StateSyncer) Run() {
if s.Disabled() {
return
}
if s.ClusterSize == nil {
panic("ClusterSize not set")
}
@ -329,7 +341,14 @@ func (s *StateSyncer) Pause() {
func (s *StateSyncer) Paused() bool {
s.pauseLock.Lock()
defer s.pauseLock.Unlock()
return s.paused != 0
return s.paused != 0 || s.hardDisabled
}
// Disabled returns whether sync runs are permanently disabled.
func (s *StateSyncer) Disabled() bool {
s.pauseLock.Lock()
defer s.pauseLock.Unlock()
return s.hardDisabled
}
// Resume re-enables sync runs. It returns true if it was the last pause/resume
@ -340,7 +359,7 @@ func (s *StateSyncer) Resume() bool {
if s.paused < 0 {
panic("unbalanced pause/resume")
}
trigger := s.paused == 0
trigger := s.paused == 0 && !s.hardDisabled
s.pauseLock.Unlock()
if trigger {
s.SyncChanges.Trigger()

View File

@ -22,8 +22,6 @@ import (
"sync/atomic"
"time"
"github.com/hashicorp/consul/lib/stringslice"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/go-connlimit"
@ -623,6 +621,9 @@ func (a *Agent) Start(ctx context.Context) error {
// create the state synchronization manager which performs
// regular and on-demand state synchronizations (anti-entropy).
a.sync = ae.NewStateSyncer(a.State, c.AEInterval, a.shutdownCh, a.logger)
if a.baseDeps.UseV2Resources() {
a.sync.HardDisableSync()
}
err = validateFIPSConfig(a.config)
if err != nil {
@ -724,7 +725,7 @@ func (a *Agent) Start(ctx context.Context) error {
)
var pt *proxytracker.ProxyTracker
if a.useV2Resources() {
if a.baseDeps.UseV2Resources() {
pt = proxyWatcher.(*proxytracker.ProxyTracker)
}
server, err := consul.NewServer(consulCfg, a.baseDeps.Deps, a.externalGRPCServer, incomingRPCLimiter, serverLogger, pt)
@ -911,20 +912,11 @@ func (a *Agent) Failed() <-chan struct{} {
return a.apiServers.failed
}
// useV2Resources returns true if "resource-apis" is present in the Experiments
// array of the agent config.
func (a *Agent) useV2Resources() bool {
if stringslice.Contains(a.baseDeps.Experiments, consul.CatalogResourceExperimentName) {
return true
}
return false
}
// getProxyWatcher returns the proper implementation of the ProxyWatcher interface.
// It will return a ProxyTracker if "resource-apis" experiment is active. Otherwise,
// it will return a ConfigSource.
func (a *Agent) getProxyWatcher() xds.ProxyWatcher {
if a.useV2Resources() {
if a.baseDeps.UseV2Resources() {
a.logger.Trace("returning proxyTracker for getProxyWatcher")
return proxytracker.NewProxyTracker(proxytracker.ProxyTrackerConfig{
Logger: a.logger.Named("proxy-tracker"),

View File

@ -79,6 +79,34 @@ func createACLTokenWithAgentReadPolicy(t *testing.T, srv *HTTPHandlers) string {
return svcToken.SecretID
}
func TestAgentEndpointsFailInV2(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `experiments = ["resource-apis"]`)
checkRequest := func(method, url string) {
t.Run(method+" "+url, func(t *testing.T) {
assertV1CatalogEndpointDoesNotWorkWithV2(t, a, method, url, `{}`)
})
}
checkRequest("PUT", "/v1/agent/maintenance")
checkRequest("GET", "/v1/agent/services")
checkRequest("GET", "/v1/agent/service/web")
checkRequest("GET", "/v1/agent/checks")
checkRequest("GET", "/v1/agent/health/service/id/web")
checkRequest("GET", "/v1/agent/health/service/name/web")
checkRequest("PUT", "/v1/agent/check/register")
checkRequest("PUT", "/v1/agent/check/deregister/web")
checkRequest("PUT", "/v1/agent/check/pass/web")
checkRequest("PUT", "/v1/agent/check/warn/web")
checkRequest("PUT", "/v1/agent/check/fail/web")
checkRequest("PUT", "/v1/agent/check/update/web")
checkRequest("PUT", "/v1/agent/service/register")
checkRequest("PUT", "/v1/agent/service/deregister/web")
checkRequest("PUT", "/v1/agent/service/maintenance/web")
}
func TestAgent_Services(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")

View File

@ -6,24 +6,68 @@ package agent
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/serf/coordinate"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
)
func TestCatalogEndpointsFailInV2(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `experiments = ["resource-apis"]`)
checkRequest := func(method, url string) {
t.Run(method+" "+url, func(t *testing.T) {
assertV1CatalogEndpointDoesNotWorkWithV2(t, a, method, url, "{}")
})
}
checkRequest("PUT", "/v1/catalog/register")
checkRequest("GET", "/v1/catalog/connect/")
checkRequest("PUT", "/v1/catalog/deregister")
checkRequest("GET", "/v1/catalog/datacenters")
checkRequest("GET", "/v1/catalog/nodes")
checkRequest("GET", "/v1/catalog/services")
checkRequest("GET", "/v1/catalog/service/")
checkRequest("GET", "/v1/catalog/node/")
checkRequest("GET", "/v1/catalog/node-services/")
checkRequest("GET", "/v1/catalog/gateway-services/")
}
func assertV1CatalogEndpointDoesNotWorkWithV2(t *testing.T, a *TestAgent, method, url string, requestBody string) {
var body io.Reader
switch method {
case http.MethodPost, http.MethodPut:
body = strings.NewReader(requestBody + "\n")
}
req, err := http.NewRequest(method, url, body)
require.NoError(t, err)
resp := httptest.NewRecorder()
a.srv.h.ServeHTTP(resp, req)
require.Equal(t, http.StatusBadRequest, resp.Code)
got, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Contains(t, string(got), structs.ErrUsingV2CatalogExperiment.Error())
}
func TestCatalogRegister_PeeringRegistration(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")

View File

@ -19,6 +19,23 @@ import (
"github.com/hashicorp/consul/testrpc"
)
func TestConfigEndpointsFailInV2(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `experiments = ["resource-apis"]`)
checkRequest := func(method, url string) {
t.Run(method+" "+url, func(t *testing.T) {
assertV1CatalogEndpointDoesNotWorkWithV2(t, a, method, url, `{"kind":"service-defaults", "name":"web"}`)
})
}
checkRequest("GET", "/v1/config/service-defaults")
checkRequest("GET", "/v1/config/service-defaults/web")
checkRequest("DELETE", "/v1/config/service-defaults/web")
checkRequest("PUT", "/v1/config")
}
func TestConfig_Get(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")

View File

@ -4,6 +4,7 @@
package consul
import (
"github.com/hashicorp/consul/lib/stringslice"
"google.golang.org/grpc"
"github.com/hashicorp/consul-net-rpc/net/rpc"
@ -48,6 +49,15 @@ type Deps struct {
EnterpriseDeps
}
// useV2Resources returns true if "resource-apis" is present in the Experiments
// array of the agent config.
func (d Deps) UseV2Resources() bool {
if stringslice.Contains(d.Experiments, CatalogResourceExperimentName) {
return true
}
return false
}
type GRPCClientConner interface {
ClientConn(datacenter string) (*grpc.ClientConn, error)
ClientConnLeader() (*grpc.ClientConn, error)

View File

@ -19,12 +19,7 @@ import (
"sync/atomic"
"time"
"github.com/hashicorp/consul/internal/auth"
"github.com/hashicorp/consul/internal/mesh"
"github.com/hashicorp/consul/internal/resource"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul-net-rpc/net/rpc"
"github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
@ -41,6 +36,7 @@ import (
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/reflection"
"github.com/hashicorp/consul-net-rpc/net/rpc"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/acl/resolver"
"github.com/hashicorp/consul/agent/blockingquery"
@ -74,9 +70,12 @@ import (
"github.com/hashicorp/consul/agent/rpc/peering"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/internal/auth"
"github.com/hashicorp/consul/internal/catalog"
"github.com/hashicorp/consul/internal/controller"
"github.com/hashicorp/consul/internal/mesh"
proxysnapshot "github.com/hashicorp/consul/internal/mesh/proxy-snapshot"
"github.com/hashicorp/consul/internal/resource"
"github.com/hashicorp/consul/internal/resource/demo"
"github.com/hashicorp/consul/internal/resource/reaper"
raftstorage "github.com/hashicorp/consul/internal/storage/raft"
@ -466,6 +465,8 @@ type Server struct {
reportingManager *reporting.ReportingManager
registry resource.Registry
useV2Resources bool
}
func (s *Server) DecrementBlockingQueries() uint64 {
@ -554,6 +555,7 @@ func NewServer(config *Config, flat Deps, externalGRPCServer *grpc.Server,
incomingRPCLimiter: incomingRPCLimiter,
routineManager: routine.NewManager(logger.Named(logging.ConsulServer)),
registry: flat.Registry,
useV2Resources: flat.UseV2Resources(),
}
incomingRPCLimiter.Register(s)
@ -589,7 +591,17 @@ func NewServer(config *Config, flat Deps, externalGRPCServer *grpc.Server,
}
rpcServerOpts := []func(*rpc.Server){
rpc.WithPreBodyInterceptor(middleware.GetNetRPCRateLimitingInterceptor(s.incomingRPCLimiter, middleware.NewPanicHandler(s.logger))),
rpc.WithPreBodyInterceptor(
middleware.ChainedRPCPreBodyInterceptor(
func(reqServiceMethod string, sourceAddr net.Addr) error {
if s.useV2Resources && isV1CatalogRequest(reqServiceMethod) {
return structs.ErrUsingV2CatalogExperiment
}
return nil
},
middleware.GetNetRPCRateLimitingInterceptor(s.incomingRPCLimiter, middleware.NewPanicHandler(s.logger)),
),
),
}
if flat.GetNetRPCInterceptorFunc != nil {
@ -898,8 +910,27 @@ func NewServer(config *Config, flat Deps, externalGRPCServer *grpc.Server,
return s, nil
}
func isV1CatalogRequest(rpcName string) bool {
switch {
case strings.HasPrefix(rpcName, "Catalog."),
strings.HasPrefix(rpcName, "Health."),
strings.HasPrefix(rpcName, "ConfigEntry."):
return true
}
switch rpcName {
case "Internal.EventFire", "Internal.KeyringOperation", "Internal.OIDCAuthMethods":
return false
default:
if strings.HasPrefix(rpcName, "Internal.") {
return true
}
return false
}
}
func (s *Server) registerControllers(deps Deps, proxyUpdater ProxyUpdater) error {
if stringslice.Contains(deps.Experiments, CatalogResourceExperimentName) {
if s.useV2Resources {
catalog.RegisterControllers(s.controllerManager, catalog.DefaultControllerDependencies())
defaultAllow, err := s.config.ACLResolverSettings.IsDefaultAllow()

View File

@ -28,6 +28,25 @@ import (
"github.com/hashicorp/consul/types"
)
func TestHealthEndpointsFailInV2(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `experiments = ["resource-apis"]`)
checkRequest := func(method, url string) {
t.Run(method+" "+url, func(t *testing.T) {
assertV1CatalogEndpointDoesNotWorkWithV2(t, a, method, url, "{}")
})
}
checkRequest("GET", "/v1/health/node/web")
checkRequest("GET", "/v1/health/checks/web")
checkRequest("GET", "/v1/health/state/web")
checkRequest("GET", "/v1/health/service/web")
checkRequest("GET", "/v1/health/connect/web")
checkRequest("GET", "/v1/health/ingress/web")
}
func TestHealthChecksInState(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")

View File

@ -394,6 +394,11 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
}
logURL = aclEndpointRE.ReplaceAllString(logURL, "$1<hidden>$4")
rejectCatalogV1Endpoint := false
if s.agent.baseDeps.UseV2Resources() {
rejectCatalogV1Endpoint = isV1CatalogRequest(logURL)
}
if s.denylist.Block(req.URL.Path) {
errMsg := "Endpoint is blocked by agent configuration"
httpLogger.Error("Request error",
@ -455,6 +460,14 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
return strings.Contains(err.Error(), rate.ErrRetryLater.Error())
}
isUsingV2CatalogExperiment := func(err error) bool {
if err == nil {
return false
}
return structs.IsErrUsingV2CatalogExperiment(err)
}
isMethodNotAllowed := func(err error) bool {
_, ok := err.(MethodNotAllowedError)
return ok
@ -490,6 +503,10 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
msg = s.Message()
}
if isUsingV2CatalogExperiment(err) && !isHTTPError(err) {
err = newRejectV1RequestWhenV2EnabledError()
}
switch {
case isForbidden(err):
resp.WriteHeader(http.StatusForbidden)
@ -566,7 +583,12 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
if err == nil {
// Invoke the handler
obj, err = handler(resp, req)
if rejectCatalogV1Endpoint {
obj = nil
err = s.rejectV1RequestWhenV2Enabled()
} else {
obj, err = handler(resp, req)
}
}
}
contentType := "application/json"
@ -608,6 +630,46 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
}
}
func isV1CatalogRequest(logURL string) bool {
switch {
case strings.HasPrefix(logURL, "/v1/catalog/"),
strings.HasPrefix(logURL, "/v1/health/"),
strings.HasPrefix(logURL, "/v1/config/"):
return true
case strings.HasPrefix(logURL, "/v1/agent/token/"),
logURL == "/v1/agent/self",
logURL == "/v1/agent/host",
logURL == "/v1/agent/version",
logURL == "/v1/agent/reload",
logURL == "/v1/agent/monitor",
logURL == "/v1/agent/metrics",
logURL == "/v1/agent/metrics/stream",
logURL == "/v1/agent/members",
strings.HasPrefix(logURL, "/v1/agent/join/"),
logURL == "/v1/agent/leave",
strings.HasPrefix(logURL, "/v1/agent/force-leave/"),
logURL == "/v1/agent/connect/authorize",
logURL == "/v1/agent/connect/ca/roots",
strings.HasPrefix(logURL, "/v1/agent/connect/ca/leaf/"):
return false
case strings.HasPrefix(logURL, "/v1/agent/"):
return true
case logURL == "/v1/internal/acl/authorize",
logURL == "/v1/internal/service-virtual-ip",
logURL == "/v1/internal/ui/oidc-auth-methods",
strings.HasPrefix(logURL, "/v1/internal/ui/metrics-proxy/"):
return false
case strings.HasPrefix(logURL, "/v1/internal/"):
return true
default:
return false
}
}
// marshalJSON marshals the object into JSON, respecting the user's pretty-ness
// configuration.
func (s *HTTPHandlers) marshalJSON(req *http.Request, obj interface{}) ([]byte, error) {
@ -1084,6 +1146,20 @@ func (s *HTTPHandlers) parseToken(req *http.Request, token *string) {
s.parseTokenWithDefault(req, token)
}
func (s *HTTPHandlers) rejectV1RequestWhenV2Enabled() error {
if s.agent.baseDeps.UseV2Resources() {
return newRejectV1RequestWhenV2EnabledError()
}
return nil
}
func newRejectV1RequestWhenV2EnabledError() error {
return HTTPError{
StatusCode: http.StatusBadRequest,
Reason: structs.ErrUsingV2CatalogExperiment.Error(),
}
}
func sourceAddrFromRequest(req *http.Request) string {
xff := req.Header.Get("X-Forwarded-For")
forwardHosts := strings.Split(xff, ",")

View File

@ -12,9 +12,10 @@ import (
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul-net-rpc/net/rpc"
rpcRate "github.com/hashicorp/consul/agent/consul/rate"
"github.com/hashicorp/go-hclog"
)
// RPCTypeInternal identifies the "RPC" request as coming from some internal
@ -25,9 +26,11 @@ import (
// Really what we are measuring here is a "cluster operation". The term we have
// used for this historically is "RPC", so we continue to use that here.
const RPCTypeInternal = "internal"
const RPCTypeNetRPC = "net/rpc"
var metricRPCRequest = []string{"rpc", "server", "call"}
var requestLogName = strings.Join(metricRPCRequest, "_")
var OneTwelveRPCSummary = []prometheus.SummaryDefinition{
@ -186,3 +189,20 @@ func GetNetRPCRateLimitingInterceptor(requestLimitsHandler rpcRate.RequestLimits
return requestLimitsHandler.Allow(op)
}
}
func ChainedRPCPreBodyInterceptor(chain ...rpc.PreBodyInterceptor) rpc.PreBodyInterceptor {
if len(chain) == 0 {
panic("don't call this with zero interceptors")
}
if len(chain) == 1 {
return chain[0]
}
return func(reqServiceMethod string, sourceAddr net.Addr) error {
for _, interceptor := range chain {
if err := interceptor(reqServiceMethod, sourceAddr); err != nil {
return err
}
}
return nil
}
}

View File

@ -23,6 +23,7 @@ const (
errRateLimited = "Rate limit reached, try again later" // Note: we depend on this error message in the gRPC ConnectCA.Sign endpoint (see: isRateLimitError).
errNotPrimaryDatacenter = "not the primary datacenter"
errStateReadOnly = "CA Provider State is read-only"
errUsingV2CatalogExperiment = "V1 catalog is disabled when V2 is enabled"
)
var (
@ -39,6 +40,7 @@ var (
ErrRateLimited = errors.New(errRateLimited) // Note: we depend on this error message in the gRPC ConnectCA.Sign endpoint (see: isRateLimitError).
ErrNotPrimaryDatacenter = errors.New(errNotPrimaryDatacenter)
ErrStateReadOnly = errors.New(errStateReadOnly)
ErrUsingV2CatalogExperiment = errors.New(errUsingV2CatalogExperiment)
)
func IsErrNoDCPath(err error) bool {
@ -60,3 +62,7 @@ func IsErrRPCRateExceeded(err error) bool {
func IsErrServiceNotFound(err error) bool {
return err != nil && strings.Contains(err.Error(), errServiceNotFound)
}
func IsErrUsingV2CatalogExperiment(err error) bool {
return err != nil && strings.Contains(err.Error(), errUsingV2CatalogExperiment)
}

View File

@ -286,6 +286,22 @@ func (a *TestAgent) waitForUp() error {
continue // fail, try again
}
if a.Config.Bootstrap && a.Config.ServerMode {
if a.baseDeps.UseV2Resources() {
args := structs.DCSpecificRequest{
Datacenter: "dc1",
}
var leader string
if err := a.RPC(context.Background(), "Status.Leader", args, &leader); err != nil {
retErr = fmt.Errorf("Status.Leader failed: %v", err)
continue // fail, try again
}
if leader == "" {
retErr = fmt.Errorf("No leader")
continue // fail, try again
}
return nil // success
}
// Ensure we have a leader and a node registration.
args := &structs.DCSpecificRequest{
Datacenter: a.Config.Datacenter,

View File

@ -32,6 +32,28 @@ import (
"github.com/hashicorp/consul/types"
)
func TestUIEndpointsFailInV2(t *testing.T) {
t.Parallel()
a := NewTestAgent(t, `experiments = ["resource-apis"]`)
checkRequest := func(method, url string) {
t.Run(method+" "+url, func(t *testing.T) {
assertV1CatalogEndpointDoesNotWorkWithV2(t, a, method, url, "{}")
})
}
checkRequest("GET", "/v1/internal/ui/nodes")
checkRequest("GET", "/v1/internal/ui/node/web")
checkRequest("GET", "/v1/internal/ui/services")
checkRequest("GET", "/v1/internal/ui/exported-services")
checkRequest("GET", "/v1/internal/ui/catalog-overview")
checkRequest("GET", "/v1/internal/ui/gateway-services-nodes/web")
checkRequest("GET", "/v1/internal/ui/gateway-intentions/web")
checkRequest("GET", "/v1/internal/ui/service-topology/web")
checkRequest("PUT", "/v1/internal/service-virtual-ip")
}
func TestUIIndex(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")