2023-03-28 19:39:22 +01:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
2023-08-11 09:12:13 -04:00
|
|
|
// SPDX-License-Identifier: BUSL-1.1
|
2023-03-28 19:39:22 +01:00
|
|
|
|
2022-07-08 12:01:13 -05:00
|
|
|
package peerstream
|
2022-05-19 14:21:29 -05:00
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"google.golang.org/genproto/googleapis/rpc/code"
|
2023-01-11 09:39:10 -05:00
|
|
|
"google.golang.org/protobuf/proto"
|
2022-07-15 15:03:40 -05:00
|
|
|
newproto "google.golang.org/protobuf/proto"
|
2022-05-19 14:21:29 -05:00
|
|
|
"google.golang.org/protobuf/types/known/anypb"
|
|
|
|
|
2023-05-08 13:13:25 -05:00
|
|
|
"github.com/hashicorp/consul/acl"
|
2022-05-19 14:21:29 -05:00
|
|
|
"github.com/hashicorp/consul/agent/cache"
|
2022-08-22 10:22:11 -04:00
|
|
|
"github.com/hashicorp/consul/agent/consul/state"
|
2022-05-19 14:21:29 -05:00
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2023-02-17 16:14:46 -05:00
|
|
|
"github.com/hashicorp/consul/proto/private/pbpeering"
|
|
|
|
"github.com/hashicorp/consul/proto/private/pbpeerstream"
|
|
|
|
"github.com/hashicorp/consul/proto/private/pbservice"
|
|
|
|
"github.com/hashicorp/consul/proto/private/pbstatus"
|
2022-07-08 12:01:13 -05:00
|
|
|
"github.com/hashicorp/consul/types"
|
2022-05-19 14:21:29 -05:00
|
|
|
)
|
|
|
|
|
2022-05-19 16:37:52 -05:00
|
|
|
/*
|
|
|
|
TODO(peering):
|
|
|
|
|
|
|
|
Then if we ever fail to apply a replication message we should either tear
|
|
|
|
down the entire connection (and thus force a resync on reconnect) or
|
|
|
|
request a resync operation.
|
|
|
|
*/
|
|
|
|
|
2022-09-29 15:37:19 -04:00
|
|
|
// makeExportedServiceListResponse handles preparing exported service list updates to the peer cluster.
|
|
|
|
// Each cache.UpdateEvent will contain all exported services.
|
|
|
|
func makeExportedServiceListResponse(
|
|
|
|
mst *MutableStatus,
|
|
|
|
update cache.UpdateEvent,
|
|
|
|
) (*pbpeerstream.ReplicationMessage_Response, error) {
|
|
|
|
exportedService, ok := update.Result.(*pbpeerstream.ExportedServiceList)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("invalid type for exported service list response: %T", update.Result)
|
|
|
|
}
|
|
|
|
|
|
|
|
any, _, err := marshalToProtoAny[*pbpeerstream.ExportedServiceList](exportedService)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to marshal: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var serviceNames []structs.ServiceName
|
|
|
|
for _, serviceName := range exportedService.Services {
|
|
|
|
sn := structs.ServiceNameFromString(serviceName)
|
|
|
|
serviceNames = append(serviceNames, sn)
|
|
|
|
}
|
|
|
|
|
|
|
|
mst.SetExportedServices(serviceNames)
|
|
|
|
|
|
|
|
return &pbpeerstream.ReplicationMessage_Response{
|
|
|
|
ResourceURL: pbpeerstream.TypeURLExportedServiceList,
|
2022-10-11 19:02:04 -06:00
|
|
|
ResourceID: subExportedServiceList,
|
|
|
|
Operation: pbpeerstream.Operation_OPERATION_UPSERT,
|
|
|
|
Resource: any,
|
2022-09-29 15:37:19 -04:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-05-26 15:24:09 -04:00
|
|
|
// makeServiceResponse handles preparing exported service instance updates to the peer cluster.
|
2022-05-19 14:21:29 -05:00
|
|
|
// Each cache.UpdateEvent will contain all instances for a service name.
|
|
|
|
// If there are no instances in the event, we consider that to be a de-registration.
|
2022-10-24 11:48:02 -05:00
|
|
|
func makeServiceResponse(update cache.UpdateEvent) (*pbpeerstream.ReplicationMessage_Response, error) {
|
2022-07-18 10:20:04 -07:00
|
|
|
serviceName := strings.TrimPrefix(update.CorrelationID, subExportedService)
|
2022-07-15 15:03:40 -05:00
|
|
|
csn, ok := update.Result.(*pbservice.IndexedCheckServiceNodes)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("invalid type for service response: %T", update.Result)
|
|
|
|
}
|
|
|
|
|
|
|
|
export := &pbpeerstream.ExportedService{
|
|
|
|
Nodes: csn.Nodes,
|
|
|
|
}
|
|
|
|
|
|
|
|
any, err := anypb.New(export)
|
2022-05-26 15:24:09 -04:00
|
|
|
if err != nil {
|
2022-07-13 10:00:35 -05:00
|
|
|
return nil, fmt.Errorf("failed to marshal: %w", err)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
2022-07-18 10:20:04 -07:00
|
|
|
|
2022-07-13 10:00:35 -05:00
|
|
|
return &pbpeerstream.ReplicationMessage_Response{
|
2022-07-15 15:03:40 -05:00
|
|
|
ResourceURL: pbpeerstream.TypeURLExportedService,
|
2022-10-11 19:02:04 -06:00
|
|
|
ResourceID: serviceName,
|
|
|
|
Operation: pbpeerstream.Operation_OPERATION_UPSERT,
|
|
|
|
Resource: any,
|
2022-07-13 10:00:35 -05:00
|
|
|
}, nil
|
2022-05-26 15:24:09 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func makeCARootsResponse(
|
|
|
|
update cache.UpdateEvent,
|
2022-07-13 10:00:35 -05:00
|
|
|
) (*pbpeerstream.ReplicationMessage_Response, error) {
|
2022-05-26 15:24:09 -04:00
|
|
|
any, _, err := marshalToProtoAny[*pbpeering.PeeringTrustBundle](update.Result)
|
2022-05-19 14:21:29 -05:00
|
|
|
if err != nil {
|
2022-07-13 10:00:35 -05:00
|
|
|
return nil, fmt.Errorf("failed to marshal: %w", err)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
2022-05-26 15:24:09 -04:00
|
|
|
|
2022-07-13 10:00:35 -05:00
|
|
|
return &pbpeerstream.ReplicationMessage_Response{
|
2022-07-15 15:03:40 -05:00
|
|
|
ResourceURL: pbpeerstream.TypeURLPeeringTrustBundle,
|
2022-10-11 19:02:04 -06:00
|
|
|
ResourceID: "roots",
|
|
|
|
Operation: pbpeerstream.Operation_OPERATION_UPSERT,
|
|
|
|
Resource: any,
|
2022-07-13 10:00:35 -05:00
|
|
|
}, nil
|
2022-05-26 15:24:09 -04:00
|
|
|
}
|
|
|
|
|
2022-08-22 10:21:20 -04:00
|
|
|
func makeServerAddrsResponse(
|
|
|
|
update cache.UpdateEvent,
|
|
|
|
) (*pbpeerstream.ReplicationMessage_Response, error) {
|
|
|
|
any, _, err := marshalToProtoAny[*pbpeering.PeeringServerAddresses](update.Result)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to marshal: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return &pbpeerstream.ReplicationMessage_Response{
|
|
|
|
ResourceURL: pbpeerstream.TypeURLPeeringServerAddresses,
|
2022-10-11 19:02:04 -06:00
|
|
|
ResourceID: "server-addrs",
|
|
|
|
Operation: pbpeerstream.Operation_OPERATION_UPSERT,
|
|
|
|
Resource: any,
|
2022-08-22 10:21:20 -04:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-05-26 15:24:09 -04:00
|
|
|
// marshalToProtoAny takes any input and returns:
|
|
|
|
// the protobuf.Any type, the asserted T type, and any errors
|
|
|
|
// during marshalling or type assertion.
|
|
|
|
// `in` MUST be of type T or it returns an error.
|
2022-07-15 15:03:40 -05:00
|
|
|
func marshalToProtoAny[T newproto.Message](in any) (*anypb.Any, T, error) {
|
2022-05-26 15:24:09 -04:00
|
|
|
typ, ok := in.(T)
|
|
|
|
if !ok {
|
|
|
|
var outType T
|
|
|
|
return nil, typ, fmt.Errorf("input type is not %T: %T", outType, in)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
2022-07-15 15:03:40 -05:00
|
|
|
any, err := anypb.New(typ)
|
2022-05-26 15:24:09 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, typ, err
|
|
|
|
}
|
|
|
|
return any, typ, nil
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-07-08 12:01:13 -05:00
|
|
|
func (s *Server) processResponse(
|
2022-05-26 15:24:09 -04:00
|
|
|
peerName string,
|
|
|
|
partition string,
|
2022-07-15 10:20:43 -07:00
|
|
|
mutableStatus *MutableStatus,
|
2022-07-08 12:01:13 -05:00
|
|
|
resp *pbpeerstream.ReplicationMessage_Response,
|
|
|
|
) (*pbpeerstream.ReplicationMessage, error) {
|
|
|
|
if !pbpeerstream.KnownTypeURL(resp.ResourceURL) {
|
2022-05-19 16:37:52 -05:00
|
|
|
err := fmt.Errorf("received response for unknown resource type %q", resp.ResourceURL)
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeNACKReply(
|
2022-05-19 16:37:52 -05:00
|
|
|
resp.ResourceURL,
|
|
|
|
resp.Nonce,
|
|
|
|
code.Code_INVALID_ARGUMENT,
|
|
|
|
err.Error(),
|
|
|
|
), err
|
2022-10-11 19:02:04 -06:00
|
|
|
}
|
|
|
|
if resp.Nonce == "" {
|
|
|
|
err := fmt.Errorf("received response without a nonce for: %s:%s", resp.ResourceURL, resp.ResourceID)
|
|
|
|
return makeNACKReply(
|
|
|
|
resp.ResourceURL,
|
|
|
|
resp.Nonce,
|
|
|
|
code.Code_INVALID_ARGUMENT,
|
|
|
|
err.Error(),
|
|
|
|
), err
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
switch resp.Operation {
|
2022-07-08 12:01:13 -05:00
|
|
|
case pbpeerstream.Operation_OPERATION_UPSERT:
|
2022-05-19 14:21:29 -05:00
|
|
|
if resp.Resource == nil {
|
2022-05-19 16:37:52 -05:00
|
|
|
err := fmt.Errorf("received upsert response with no content")
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeNACKReply(
|
2022-05-19 16:37:52 -05:00
|
|
|
resp.ResourceURL,
|
|
|
|
resp.Nonce,
|
|
|
|
code.Code_INVALID_ARGUMENT,
|
|
|
|
err.Error(),
|
|
|
|
), err
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
2022-05-19 16:37:52 -05:00
|
|
|
|
2022-08-22 10:09:47 -04:00
|
|
|
if err := s.handleUpsert(peerName, partition, mutableStatus, resp.ResourceURL, resp.ResourceID, resp.Resource); err != nil {
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeNACKReply(
|
2022-05-19 16:37:52 -05:00
|
|
|
resp.ResourceURL,
|
|
|
|
resp.Nonce,
|
|
|
|
code.Code_INTERNAL,
|
|
|
|
fmt.Sprintf("upsert error, ResourceURL: %q, ResourceID: %q: %v", resp.ResourceURL, resp.ResourceID, err),
|
|
|
|
), fmt.Errorf("upsert error: %w", err)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeACKReply(resp.ResourceURL, resp.Nonce), nil
|
2022-05-19 16:37:52 -05:00
|
|
|
|
2022-05-19 14:21:29 -05:00
|
|
|
default:
|
2022-05-19 16:37:52 -05:00
|
|
|
var errMsg string
|
2022-07-08 12:01:13 -05:00
|
|
|
if op := pbpeerstream.Operation_name[int32(resp.Operation)]; op != "" {
|
2022-05-19 16:37:52 -05:00
|
|
|
errMsg = fmt.Sprintf("unsupported operation: %q", op)
|
|
|
|
} else {
|
|
|
|
errMsg = fmt.Sprintf("unsupported operation: %d", resp.Operation)
|
|
|
|
}
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeNACKReply(
|
2022-05-19 16:37:52 -05:00
|
|
|
resp.ResourceURL,
|
|
|
|
resp.Nonce,
|
|
|
|
code.Code_INVALID_ARGUMENT,
|
|
|
|
errMsg,
|
|
|
|
), errors.New(errMsg)
|
|
|
|
}
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
|
2022-07-08 12:01:13 -05:00
|
|
|
func (s *Server) handleUpsert(
|
2022-05-19 16:37:52 -05:00
|
|
|
peerName string,
|
|
|
|
partition string,
|
2022-07-15 10:20:43 -07:00
|
|
|
mutableStatus *MutableStatus,
|
2022-05-19 16:37:52 -05:00
|
|
|
resourceURL string,
|
|
|
|
resourceID string,
|
|
|
|
resource *anypb.Any,
|
|
|
|
) error {
|
2022-07-15 15:03:40 -05:00
|
|
|
if resource.TypeUrl != resourceURL {
|
|
|
|
return fmt.Errorf("mismatched resourceURL %q and Any typeUrl %q", resourceURL, resource.TypeUrl)
|
|
|
|
}
|
|
|
|
|
2022-05-19 16:37:52 -05:00
|
|
|
switch resourceURL {
|
2022-09-29 15:37:19 -04:00
|
|
|
case pbpeerstream.TypeURLExportedServiceList:
|
|
|
|
export := &pbpeerstream.ExportedServiceList{}
|
|
|
|
if err := resource.UnmarshalTo(export); err != nil {
|
|
|
|
return fmt.Errorf("failed to unmarshal resource: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
err := s.handleUpsertExportedServiceList(mutableStatus, peerName, partition, export)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("did not update imported services based on the exported service list event: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2022-07-15 15:03:40 -05:00
|
|
|
case pbpeerstream.TypeURLExportedService:
|
2022-05-19 16:37:52 -05:00
|
|
|
sn := structs.ServiceNameFromString(resourceID)
|
|
|
|
sn.OverridePartition(partition)
|
|
|
|
|
2022-07-15 15:03:40 -05:00
|
|
|
export := &pbpeerstream.ExportedService{}
|
|
|
|
if err := resource.UnmarshalTo(export); err != nil {
|
2022-05-19 16:37:52 -05:00
|
|
|
return fmt.Errorf("failed to unmarshal resource: %w", err)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-07-15 15:03:40 -05:00
|
|
|
err := s.handleUpdateService(peerName, partition, sn, export)
|
2022-07-15 10:20:43 -07:00
|
|
|
if err != nil {
|
2022-07-15 15:03:40 -05:00
|
|
|
return fmt.Errorf("did not increment imported services count for service=%q: %w", sn.String(), err)
|
2022-07-15 10:20:43 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2022-05-26 15:24:09 -04:00
|
|
|
|
2022-07-15 15:03:40 -05:00
|
|
|
case pbpeerstream.TypeURLPeeringTrustBundle:
|
2022-05-26 15:24:09 -04:00
|
|
|
roots := &pbpeering.PeeringTrustBundle{}
|
2022-07-15 15:03:40 -05:00
|
|
|
if err := resource.UnmarshalTo(roots); err != nil {
|
2022-05-26 15:24:09 -04:00
|
|
|
return fmt.Errorf("failed to unmarshal resource: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return s.handleUpsertRoots(peerName, partition, roots)
|
|
|
|
|
2022-08-22 10:21:20 -04:00
|
|
|
case pbpeerstream.TypeURLPeeringServerAddresses:
|
|
|
|
addrs := &pbpeering.PeeringServerAddresses{}
|
|
|
|
if err := resource.UnmarshalTo(addrs); err != nil {
|
|
|
|
return fmt.Errorf("failed to unmarshal resource: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return s.handleUpsertServerAddrs(peerName, partition, addrs)
|
2022-05-19 16:37:52 -05:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("unexpected resourceURL: %s", resourceURL)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:37:19 -04:00
|
|
|
func (s *Server) handleUpsertExportedServiceList(
|
|
|
|
mutableStatus *MutableStatus,
|
|
|
|
peerName string,
|
|
|
|
partition string,
|
|
|
|
export *pbpeerstream.ExportedServiceList,
|
|
|
|
) error {
|
|
|
|
exportedServices := make(map[structs.ServiceName]struct{})
|
|
|
|
var serviceNames []structs.ServiceName
|
|
|
|
for _, service := range export.Services {
|
|
|
|
sn := structs.ServiceNameFromString(service)
|
|
|
|
sn.OverridePartition(partition)
|
|
|
|
|
|
|
|
// This ensures that we don't delete exported service's sidecars below.
|
|
|
|
snSidecarProxy := structs.ServiceNameFromString(service + syntheticProxyNameSuffix)
|
|
|
|
snSidecarProxy.OverridePartition(partition)
|
|
|
|
|
|
|
|
exportedServices[sn] = struct{}{}
|
|
|
|
exportedServices[snSidecarProxy] = struct{}{}
|
|
|
|
serviceNames = append(serviceNames, sn)
|
|
|
|
}
|
|
|
|
|
2023-05-24 16:32:45 -05:00
|
|
|
// Ensure we query services from all namespaces in this partition when we perform
|
|
|
|
// this query or else we may not propagate updates / deletes correctly.
|
|
|
|
entMeta := acl.NewEnterpriseMetaWithPartition(partition, acl.WildcardName)
|
|
|
|
_, serviceList, err := s.GetStore().ServiceList(nil, &entMeta, peerName)
|
2022-09-29 15:37:19 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, sn := range serviceList {
|
|
|
|
if _, ok := exportedServices[sn]; !ok {
|
|
|
|
err := s.handleUpdateService(peerName, partition, sn, nil)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to delete unexported service: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mutableStatus.SetImportedServices(serviceNames)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-06-13 11:52:28 -06:00
|
|
|
// handleUpdateService handles both deletion and upsert events for a service.
|
2022-08-22 10:22:11 -04:00
|
|
|
//
|
|
|
|
// On an UPSERT event:
|
|
|
|
// - All nodes, services, checks in the input pbNodes are re-applied through Raft.
|
|
|
|
// - Any nodes, services, or checks in the catalog that were not in the input pbNodes get deleted.
|
2022-06-13 11:52:28 -06:00
|
|
|
//
|
|
|
|
// On a DELETE event:
|
|
|
|
// - A reconciliation against nil or empty input pbNodes leads to deleting all stored catalog resources
|
|
|
|
// associated with the service name.
|
2022-07-08 12:01:13 -05:00
|
|
|
func (s *Server) handleUpdateService(
|
2022-05-19 16:37:52 -05:00
|
|
|
peerName string,
|
|
|
|
partition string,
|
|
|
|
sn structs.ServiceName,
|
2022-07-15 15:03:40 -05:00
|
|
|
export *pbpeerstream.ExportedService,
|
2022-05-19 16:37:52 -05:00
|
|
|
) error {
|
2022-06-13 11:52:28 -06:00
|
|
|
// Capture instances in the state store for reconciliation later.
|
2022-07-08 12:01:13 -05:00
|
|
|
_, storedInstances, err := s.GetStore().CheckServiceNodes(nil, sn.Name, &sn.EnterpriseMeta, peerName)
|
2022-06-13 11:52:28 -06:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to read imported services: %w", err)
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-10-26 11:50:34 -04:00
|
|
|
structsNodes := []structs.CheckServiceNode{}
|
|
|
|
if export != nil {
|
|
|
|
structsNodes, err = export.CheckServiceNodesToStruct()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to convert protobuf instances to structs: %w", err)
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-05-19 16:37:52 -05:00
|
|
|
// Normalize the data into a convenient form for operation.
|
|
|
|
snap := newHealthSnapshot(structsNodes, partition, peerName)
|
2022-10-18 16:19:24 -04:00
|
|
|
storedNodesMap, storedSvcInstMap, storedChecksMap := buildStoredMap(storedInstances)
|
2022-05-19 14:21:29 -05:00
|
|
|
|
2022-05-19 16:37:52 -05:00
|
|
|
for _, nodeSnap := range snap.Nodes {
|
2022-10-18 16:19:24 -04:00
|
|
|
// First register the node - skip the unchanged ones
|
|
|
|
changed := true
|
2023-05-08 13:13:25 -05:00
|
|
|
if storedNode, ok := storedNodesMap[nodeSnap.Node.Node]; ok {
|
2022-10-18 16:19:24 -04:00
|
|
|
if storedNode.IsSame(nodeSnap.Node) {
|
|
|
|
changed = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-19 16:37:52 -05:00
|
|
|
req := nodeSnap.Node.ToRegisterRequest()
|
2022-10-18 16:19:24 -04:00
|
|
|
if changed {
|
|
|
|
if err := s.Backend.CatalogRegister(&req); err != nil {
|
|
|
|
return fmt.Errorf("failed to register node: %w", err)
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-10-18 16:19:24 -04:00
|
|
|
// Then register all services on that node - skip the unchanged ones
|
2022-05-19 16:37:52 -05:00
|
|
|
for _, svcSnap := range nodeSnap.Services {
|
2022-10-18 16:19:24 -04:00
|
|
|
changed = true
|
2023-05-08 13:13:25 -05:00
|
|
|
if storedSvcInst, ok := storedSvcInstMap[makeNodeSvcInstID(nodeSnap.Node.Node, svcSnap.Service.ID)]; ok {
|
2022-10-18 16:19:24 -04:00
|
|
|
if storedSvcInst.IsSame(svcSnap.Service) {
|
|
|
|
changed = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if changed {
|
|
|
|
req.Service = svcSnap.Service
|
|
|
|
if err := s.Backend.CatalogRegister(&req); err != nil {
|
|
|
|
return fmt.Errorf("failed to register service: %w", err)
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
req.Service = nil
|
|
|
|
|
2022-10-18 16:19:24 -04:00
|
|
|
// Then register all checks on that node - skip the unchanged ones
|
2022-05-19 14:21:29 -05:00
|
|
|
var chks structs.HealthChecks
|
2022-05-19 16:37:52 -05:00
|
|
|
for _, svcSnap := range nodeSnap.Services {
|
|
|
|
for _, c := range svcSnap.Checks {
|
2022-10-18 16:19:24 -04:00
|
|
|
changed := true
|
2023-05-08 13:13:25 -05:00
|
|
|
if chk, ok := storedChecksMap[makeNodeCheckID(nodeSnap.Node.Node, svcSnap.Service.ID, c.CheckID)]; ok {
|
2022-10-18 16:19:24 -04:00
|
|
|
if chk.IsSame(c) {
|
|
|
|
changed = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if changed {
|
|
|
|
chks = append(chks, c)
|
|
|
|
}
|
2022-05-19 16:37:52 -05:00
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
|
2022-10-18 16:19:24 -04:00
|
|
|
if len(chks) > 0 {
|
|
|
|
req.Checks = chks
|
|
|
|
if err := s.Backend.CatalogRegister(&req); err != nil {
|
|
|
|
return fmt.Errorf("failed to register check: %w", err)
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
}
|
|
|
|
}
|
2022-05-19 16:37:52 -05:00
|
|
|
|
2022-06-13 11:52:28 -06:00
|
|
|
//
|
|
|
|
// Now that the data received has been stored in the state store, the rest of this
|
|
|
|
// function is responsible for cleaning up data in the catalog that wasn't in the snapshot.
|
|
|
|
//
|
|
|
|
|
|
|
|
// nodeCheckTuple uniquely identifies a node check in the catalog.
|
|
|
|
// The partition is not needed because we are only operating on one partition's catalog.
|
|
|
|
type nodeCheckTuple struct {
|
|
|
|
checkID types.CheckID
|
|
|
|
node string
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
// unusedNodes tracks node names that were not present in the latest response.
|
|
|
|
// Missing nodes are not assumed to be deleted because there may be other service names
|
|
|
|
// registered on them.
|
|
|
|
// Inside we also track a map of node checks associated with the node.
|
|
|
|
unusedNodes = make(map[string]struct{})
|
|
|
|
|
|
|
|
// deletedNodeChecks tracks node checks that were not present in the latest response.
|
|
|
|
// A single node check will be attached to all service instances of a node, so this
|
|
|
|
// deduplication prevents issuing multiple deregistrations for a single check.
|
|
|
|
deletedNodeChecks = make(map[nodeCheckTuple]struct{})
|
|
|
|
)
|
|
|
|
for _, csn := range storedInstances {
|
2022-07-15 10:51:38 -04:00
|
|
|
if _, ok := snap.Nodes[csn.Node.Node]; !ok {
|
|
|
|
unusedNodes[csn.Node.Node] = struct{}{}
|
2022-06-13 11:52:28 -06:00
|
|
|
|
|
|
|
// Since the node is not in the snapshot we can know the associated service
|
|
|
|
// instance is not in the snapshot either, since a service instance can't
|
|
|
|
// exist without a node.
|
|
|
|
// This will also delete all service checks.
|
2022-07-08 12:01:13 -05:00
|
|
|
err := s.Backend.CatalogDeregister(&structs.DeregisterRequest{
|
2022-06-13 11:52:28 -06:00
|
|
|
Node: csn.Node.Node,
|
|
|
|
ServiceID: csn.Service.ID,
|
|
|
|
EnterpriseMeta: csn.Service.EnterpriseMeta,
|
|
|
|
PeerName: peerName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to deregister service %q: %w", csn.Service.CompoundServiceID(), err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// We can't know if a node check was deleted from the exporting cluster
|
|
|
|
// (but not the node itself) if the node wasn't in the snapshot,
|
|
|
|
// so we do not loop over checks here.
|
|
|
|
// If the unusedNode gets deleted below that will also delete node checks.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the service instance if not in the snapshot.
|
|
|
|
sid := csn.Service.CompoundServiceID()
|
2022-07-15 10:51:38 -04:00
|
|
|
if _, ok := snap.Nodes[csn.Node.Node].Services[sid]; !ok {
|
2022-07-08 12:01:13 -05:00
|
|
|
err := s.Backend.CatalogDeregister(&structs.DeregisterRequest{
|
2022-06-13 11:52:28 -06:00
|
|
|
Node: csn.Node.Node,
|
|
|
|
ServiceID: csn.Service.ID,
|
|
|
|
EnterpriseMeta: csn.Service.EnterpriseMeta,
|
|
|
|
PeerName: peerName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
ident := fmt.Sprintf("partition:%s/peer:%s/node:%s/ns:%s/service_id:%s",
|
|
|
|
csn.Service.PartitionOrDefault(), peerName, csn.Node.Node, csn.Service.NamespaceOrDefault(), csn.Service.ID)
|
|
|
|
return fmt.Errorf("failed to deregister service %q: %w", ident, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// When a service is deleted all associated checks also get deleted as a side effect.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reconcile checks.
|
|
|
|
for _, chk := range csn.Checks {
|
2022-07-15 10:51:38 -04:00
|
|
|
if _, ok := snap.Nodes[csn.Node.Node].Services[sid].Checks[chk.CheckID]; !ok {
|
2022-06-13 11:52:28 -06:00
|
|
|
// Checks without a ServiceID are node checks.
|
|
|
|
// If the node exists but the check does not then the check was deleted.
|
|
|
|
if chk.ServiceID == "" {
|
|
|
|
// Deduplicate node checks to avoid deregistering a check multiple times.
|
|
|
|
tuple := nodeCheckTuple{
|
|
|
|
checkID: chk.CheckID,
|
|
|
|
node: chk.Node,
|
|
|
|
}
|
|
|
|
deletedNodeChecks[tuple] = struct{}{}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the check isn't a node check then it's a service check.
|
|
|
|
// Service checks that were not present can be deleted immediately because
|
|
|
|
// checks for a given service ID will only be attached to a single CheckServiceNode.
|
2022-07-08 12:01:13 -05:00
|
|
|
err := s.Backend.CatalogDeregister(&structs.DeregisterRequest{
|
2022-06-13 11:52:28 -06:00
|
|
|
Node: chk.Node,
|
|
|
|
CheckID: chk.CheckID,
|
|
|
|
EnterpriseMeta: chk.EnterpriseMeta,
|
|
|
|
PeerName: peerName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
ident := fmt.Sprintf("partition:%s/peer:%s/node:%s/ns:%s/check_id:%s",
|
|
|
|
chk.PartitionOrDefault(), peerName, chk.Node, chk.NamespaceOrDefault(), chk.CheckID)
|
|
|
|
return fmt.Errorf("failed to deregister check %q: %w", ident, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-05-19 16:37:52 -05:00
|
|
|
|
2022-06-13 11:52:28 -06:00
|
|
|
// Delete all deduplicated node checks.
|
|
|
|
for chk := range deletedNodeChecks {
|
|
|
|
nodeMeta := structs.NodeEnterpriseMetaInPartition(sn.PartitionOrDefault())
|
2022-07-08 12:01:13 -05:00
|
|
|
err := s.Backend.CatalogDeregister(&structs.DeregisterRequest{
|
2022-06-13 11:52:28 -06:00
|
|
|
Node: chk.node,
|
|
|
|
CheckID: chk.checkID,
|
|
|
|
EnterpriseMeta: *nodeMeta,
|
|
|
|
PeerName: peerName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
ident := fmt.Sprintf("partition:%s/peer:%s/node:%s/check_id:%s", nodeMeta.PartitionOrDefault(), peerName, chk.node, chk.checkID)
|
|
|
|
return fmt.Errorf("failed to deregister node check %q: %w", ident, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete any nodes that do not have any other services registered on them.
|
|
|
|
for node := range unusedNodes {
|
2023-05-08 13:13:25 -05:00
|
|
|
// The wildcard is used here so that all services, regardless of namespace are returned
|
|
|
|
// by the following query. Without this, the node might accidentally be cleaned up early.
|
|
|
|
wildcardNSMeta := acl.NewEnterpriseMetaWithPartition(sn.PartitionOrDefault(), acl.WildcardName)
|
|
|
|
_, ns, err := s.GetStore().NodeServiceList(nil, node, &wildcardNSMeta, peerName)
|
2022-06-13 11:52:28 -06:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to query services on node: %w", err)
|
|
|
|
}
|
|
|
|
if ns != nil && len(ns.Services) >= 1 {
|
|
|
|
// At least one service is still registered on this node, so we keep it.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// All services on the node were deleted, so the node is also cleaned up.
|
2022-07-08 12:01:13 -05:00
|
|
|
err = s.Backend.CatalogDeregister(&structs.DeregisterRequest{
|
2022-06-13 11:52:28 -06:00
|
|
|
Node: node,
|
|
|
|
PeerName: peerName,
|
2023-05-08 13:13:25 -05:00
|
|
|
EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(sn.PartitionOrDefault()),
|
2022-06-13 11:52:28 -06:00
|
|
|
})
|
|
|
|
if err != nil {
|
2023-05-08 13:13:25 -05:00
|
|
|
ident := fmt.Sprintf("partition:%s/peer:%s/node:%s", sn.PartitionOrDefault(), peerName, node)
|
2022-06-13 11:52:28 -06:00
|
|
|
return fmt.Errorf("failed to deregister node %q: %w", ident, err)
|
|
|
|
}
|
|
|
|
}
|
2022-05-19 14:21:29 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-07-08 12:01:13 -05:00
|
|
|
func (s *Server) handleUpsertRoots(
|
2022-05-26 15:24:09 -04:00
|
|
|
peerName string,
|
|
|
|
partition string,
|
|
|
|
trustBundle *pbpeering.PeeringTrustBundle,
|
|
|
|
) error {
|
|
|
|
// We override the partition and peer name so that the trust bundle gets stored
|
|
|
|
// in the importing partition with a reference to the peer it was imported from.
|
|
|
|
trustBundle.Partition = partition
|
|
|
|
trustBundle.PeerName = peerName
|
|
|
|
req := &pbpeering.PeeringTrustBundleWriteRequest{
|
|
|
|
PeeringTrustBundle: trustBundle,
|
|
|
|
}
|
2022-07-08 12:01:13 -05:00
|
|
|
return s.Backend.PeeringTrustBundleWrite(req)
|
2022-05-26 15:24:09 -04:00
|
|
|
}
|
|
|
|
|
2022-08-22 10:22:11 -04:00
|
|
|
func (s *Server) handleUpsertServerAddrs(
|
|
|
|
peerName string,
|
|
|
|
partition string,
|
|
|
|
addrs *pbpeering.PeeringServerAddresses,
|
|
|
|
) error {
|
|
|
|
q := state.Query{
|
|
|
|
Value: peerName,
|
|
|
|
EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(partition),
|
|
|
|
}
|
|
|
|
_, existing, err := s.GetStore().PeeringRead(nil, q)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to read peering: %w", err)
|
|
|
|
}
|
|
|
|
if existing == nil || !existing.IsActive() {
|
|
|
|
return fmt.Errorf("peering does not exist or has been marked for deletion")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Clone to avoid mutating the existing data
|
|
|
|
p := proto.Clone(existing).(*pbpeering.Peering)
|
|
|
|
p.PeerServerAddresses = addrs.GetAddresses()
|
|
|
|
|
|
|
|
req := &pbpeering.PeeringWriteRequest{
|
|
|
|
Peering: p,
|
|
|
|
}
|
|
|
|
return s.Backend.PeeringWrite(req)
|
|
|
|
}
|
|
|
|
|
2022-07-13 10:00:35 -05:00
|
|
|
func makeACKReply(resourceURL, nonce string) *pbpeerstream.ReplicationMessage {
|
|
|
|
return makeReplicationRequest(&pbpeerstream.ReplicationMessage_Request{
|
|
|
|
ResourceURL: resourceURL,
|
|
|
|
ResponseNonce: nonce,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func makeNACKReply(resourceURL, nonce string, errCode code.Code, errMsg string) *pbpeerstream.ReplicationMessage {
|
2022-05-19 14:21:29 -05:00
|
|
|
var rpcErr *pbstatus.Status
|
|
|
|
if errCode != code.Code_OK || errMsg != "" {
|
|
|
|
rpcErr = &pbstatus.Status{
|
|
|
|
Code: int32(errCode),
|
|
|
|
Message: errMsg,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-13 10:00:35 -05:00
|
|
|
return makeReplicationRequest(&pbpeerstream.ReplicationMessage_Request{
|
|
|
|
ResourceURL: resourceURL,
|
|
|
|
ResponseNonce: nonce,
|
|
|
|
Error: rpcErr,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// makeReplicationRequest is a convenience method to make a Request-type ReplicationMessage.
|
|
|
|
func makeReplicationRequest(req *pbpeerstream.ReplicationMessage_Request) *pbpeerstream.ReplicationMessage {
|
2022-07-08 12:01:13 -05:00
|
|
|
return &pbpeerstream.ReplicationMessage{
|
|
|
|
Payload: &pbpeerstream.ReplicationMessage_Request_{
|
2022-07-13 10:00:35 -05:00
|
|
|
Request: req,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// makeReplicationResponse is a convenience method to make a Response-type ReplicationMessage.
|
|
|
|
func makeReplicationResponse(resp *pbpeerstream.ReplicationMessage_Response) *pbpeerstream.ReplicationMessage {
|
|
|
|
return &pbpeerstream.ReplicationMessage{
|
|
|
|
Payload: &pbpeerstream.ReplicationMessage_Response_{
|
|
|
|
Response: resp,
|
2022-05-19 14:21:29 -05:00
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
2022-10-18 16:19:24 -04:00
|
|
|
|
|
|
|
// nodeSvcInstIdentity uniquely identifies an service instance imported from a peering cluster
|
|
|
|
type nodeSvcInstIdentity struct {
|
|
|
|
nodeID string
|
|
|
|
serviceID string
|
|
|
|
}
|
|
|
|
|
|
|
|
// nodeCheckIdentity uniquely identifies a check imported from a peering cluster
|
|
|
|
type nodeCheckIdentity struct {
|
|
|
|
nodeID string
|
|
|
|
serviceID string
|
|
|
|
checkID string
|
|
|
|
}
|
|
|
|
|
2023-05-08 13:13:25 -05:00
|
|
|
func makeNodeSvcInstID(node string, serviceID string) nodeSvcInstIdentity {
|
2022-10-18 16:19:24 -04:00
|
|
|
return nodeSvcInstIdentity{
|
2023-05-08 13:13:25 -05:00
|
|
|
nodeID: node,
|
2022-10-18 16:19:24 -04:00
|
|
|
serviceID: serviceID,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-08 13:13:25 -05:00
|
|
|
func makeNodeCheckID(node string, serviceID string, checkID types.CheckID) nodeCheckIdentity {
|
2022-10-18 16:19:24 -04:00
|
|
|
return nodeCheckIdentity{
|
|
|
|
serviceID: serviceID,
|
|
|
|
checkID: string(checkID),
|
2023-05-08 13:13:25 -05:00
|
|
|
nodeID: node,
|
2022-10-18 16:19:24 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-08 13:13:25 -05:00
|
|
|
func buildStoredMap(storedInstances structs.CheckServiceNodes) (
|
|
|
|
map[string]*structs.Node,
|
|
|
|
map[nodeSvcInstIdentity]*structs.NodeService,
|
|
|
|
map[nodeCheckIdentity]*structs.HealthCheck,
|
|
|
|
) {
|
|
|
|
nodesMap := map[string]*structs.Node{}
|
2022-10-18 16:19:24 -04:00
|
|
|
svcInstMap := map[nodeSvcInstIdentity]*structs.NodeService{}
|
|
|
|
checksMap := map[nodeCheckIdentity]*structs.HealthCheck{}
|
|
|
|
|
|
|
|
for _, csn := range storedInstances {
|
2023-05-08 13:13:25 -05:00
|
|
|
nodesMap[csn.Node.Node] = csn.Node
|
|
|
|
svcInstMap[makeNodeSvcInstID(csn.Node.Node, csn.Service.ID)] = csn.Service
|
2022-10-18 16:19:24 -04:00
|
|
|
for _, chk := range csn.Checks {
|
2023-05-08 13:13:25 -05:00
|
|
|
checksMap[makeNodeCheckID(csn.Node.Node, csn.Service.ID, chk.CheckID)] = chk
|
2022-10-18 16:19:24 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodesMap, svcInstMap, checksMap
|
|
|
|
}
|