mirror of https://github.com/status-im/consul.git
feat(v2): add consul service and workloads to catalog (#20077)
This commit is contained in:
parent
8e2d4e3aaf
commit
073959866d
|
@ -8,7 +8,6 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"reflect"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -17,6 +16,7 @@ import (
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
"github.com/armon/go-metrics/prometheus"
|
"github.com/armon/go-metrics/prometheus"
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
"github.com/oklog/ulid/v2"
|
"github.com/oklog/ulid/v2"
|
||||||
"golang.org/x/time/rate"
|
"golang.org/x/time/rate"
|
||||||
"google.golang.org/protobuf/types/known/anypb"
|
"google.golang.org/protobuf/types/known/anypb"
|
||||||
|
@ -36,9 +36,9 @@ import (
|
||||||
"github.com/hashicorp/consul/internal/storage"
|
"github.com/hashicorp/consul/internal/storage"
|
||||||
"github.com/hashicorp/consul/lib"
|
"github.com/hashicorp/consul/lib"
|
||||||
"github.com/hashicorp/consul/logging"
|
"github.com/hashicorp/consul/logging"
|
||||||
|
pbcatalog "github.com/hashicorp/consul/proto-public/pbcatalog/v2beta1"
|
||||||
"github.com/hashicorp/consul/proto-public/pbresource"
|
"github.com/hashicorp/consul/proto-public/pbresource"
|
||||||
pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1"
|
pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1"
|
||||||
"github.com/hashicorp/consul/types"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var LeaderSummaries = []prometheus.SummaryDefinition{
|
var LeaderSummaries = []prometheus.SummaryDefinition{
|
||||||
|
@ -355,6 +355,12 @@ func (s *Server) establishLeadership(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.useV2Resources {
|
||||||
|
if err := s.initConsulService(ctx, s.insecureResourceServiceClient); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if s.config.Reporting.License.Enabled && s.reportingManager != nil {
|
if s.config.Reporting.License.Enabled && s.reportingManager != nil {
|
||||||
s.reportingManager.StartReportingAgent()
|
s.reportingManager.StartReportingAgent()
|
||||||
}
|
}
|
||||||
|
@ -958,13 +964,21 @@ func (s *Server) reconcileReaped(known map[string]struct{}, nodeEntMeta *acl.Ent
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to reap this member
|
// Attempt to reap this member
|
||||||
if err := s.handleReapMember(member, nodeEntMeta); err != nil {
|
if err := s.registrator.HandleReapMember(member, nodeEntMeta, s.removeConsulServer); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ConsulRegistrator is an interface that manages the catalog registration lifecycle of Consul servers from serf events.
|
||||||
|
type ConsulRegistrator interface {
|
||||||
|
HandleAliveMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, joinServer func(m serf.Member, parts *metadata.Server) error) error
|
||||||
|
HandleFailedMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error
|
||||||
|
HandleLeftMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error
|
||||||
|
HandleReapMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error
|
||||||
|
}
|
||||||
|
|
||||||
// reconcileMember is used to do an async reconcile of a single
|
// reconcileMember is used to do an async reconcile of a single
|
||||||
// serf member
|
// serf member
|
||||||
func (s *Server) reconcileMember(member serf.Member) error {
|
func (s *Server) reconcileMember(member serf.Member) error {
|
||||||
|
@ -983,13 +997,13 @@ func (s *Server) reconcileMember(member serf.Member) error {
|
||||||
var err error
|
var err error
|
||||||
switch member.Status {
|
switch member.Status {
|
||||||
case serf.StatusAlive:
|
case serf.StatusAlive:
|
||||||
err = s.handleAliveMember(member, nodeEntMeta)
|
err = s.registrator.HandleAliveMember(member, nodeEntMeta, s.joinConsulServer)
|
||||||
case serf.StatusFailed:
|
case serf.StatusFailed:
|
||||||
err = s.handleFailedMember(member, nodeEntMeta)
|
err = s.registrator.HandleFailedMember(member, nodeEntMeta)
|
||||||
case serf.StatusLeft:
|
case serf.StatusLeft:
|
||||||
err = s.handleLeftMember(member, nodeEntMeta)
|
err = s.registrator.HandleLeftMember(member, nodeEntMeta, s.removeConsulServer)
|
||||||
case StatusReap:
|
case StatusReap:
|
||||||
err = s.handleReapMember(member, nodeEntMeta)
|
err = s.registrator.HandleReapMember(member, nodeEntMeta, s.removeConsulServer)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.logger.Error("failed to reconcile member",
|
s.logger.Error("failed to reconcile member",
|
||||||
|
@ -1020,254 +1034,6 @@ func (s *Server) shouldHandleMember(member serf.Member) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleAliveMember is used to ensure the node
|
|
||||||
// is registered, with a passing health check.
|
|
||||||
func (s *Server) handleAliveMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
|
||||||
if nodeEntMeta == nil {
|
|
||||||
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register consul service if a server
|
|
||||||
var service *structs.NodeService
|
|
||||||
if valid, parts := metadata.IsConsulServer(member); valid {
|
|
||||||
service = &structs.NodeService{
|
|
||||||
ID: structs.ConsulServiceID,
|
|
||||||
Service: structs.ConsulServiceName,
|
|
||||||
Port: parts.Port,
|
|
||||||
Weights: &structs.Weights{
|
|
||||||
Passing: 1,
|
|
||||||
Warning: 1,
|
|
||||||
},
|
|
||||||
EnterpriseMeta: *nodeEntMeta,
|
|
||||||
Meta: map[string]string{
|
|
||||||
// DEPRECATED - remove nonvoter in favor of read_replica in a future version of consul
|
|
||||||
"non_voter": strconv.FormatBool(member.Tags["nonvoter"] == "1"),
|
|
||||||
"read_replica": strconv.FormatBool(member.Tags["read_replica"] == "1"),
|
|
||||||
"raft_version": strconv.Itoa(parts.RaftVersion),
|
|
||||||
"serf_protocol_current": strconv.FormatUint(uint64(member.ProtocolCur), 10),
|
|
||||||
"serf_protocol_min": strconv.FormatUint(uint64(member.ProtocolMin), 10),
|
|
||||||
"serf_protocol_max": strconv.FormatUint(uint64(member.ProtocolMax), 10),
|
|
||||||
"version": parts.Build.String(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if parts.ExternalGRPCPort > 0 {
|
|
||||||
service.Meta["grpc_port"] = strconv.Itoa(parts.ExternalGRPCPort)
|
|
||||||
}
|
|
||||||
if parts.ExternalGRPCTLSPort > 0 {
|
|
||||||
service.Meta["grpc_tls_port"] = strconv.Itoa(parts.ExternalGRPCTLSPort)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to join the consul server
|
|
||||||
if err := s.joinConsulServer(member, parts); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the node exists
|
|
||||||
state := s.fsm.State()
|
|
||||||
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if node != nil && node.Address == member.Addr.String() {
|
|
||||||
// Check if the associated service is available
|
|
||||||
if service != nil {
|
|
||||||
match := false
|
|
||||||
_, services, err := state.NodeServices(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if services != nil {
|
|
||||||
for id, serv := range services.Services {
|
|
||||||
if id == service.ID {
|
|
||||||
// If metadata are different, be sure to update it
|
|
||||||
match = reflect.DeepEqual(serv.Meta, service.Meta)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !match {
|
|
||||||
goto AFTER_CHECK
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the serfCheck is in the passing state
|
|
||||||
_, checks, err := state.NodeChecks(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, check := range checks {
|
|
||||||
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthPassing {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AFTER_CHECK:
|
|
||||||
s.logger.Info("member joined, marking health alive",
|
|
||||||
"member", member.Name,
|
|
||||||
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
|
||||||
)
|
|
||||||
|
|
||||||
// Get consul version from serf member
|
|
||||||
// add this as node meta in catalog register request
|
|
||||||
buildVersion, err := metadata.Build(&member)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register with the catalog.
|
|
||||||
req := structs.RegisterRequest{
|
|
||||||
Datacenter: s.config.Datacenter,
|
|
||||||
Node: member.Name,
|
|
||||||
ID: types.NodeID(member.Tags["id"]),
|
|
||||||
Address: member.Addr.String(),
|
|
||||||
Service: service,
|
|
||||||
Check: &structs.HealthCheck{
|
|
||||||
Node: member.Name,
|
|
||||||
CheckID: structs.SerfCheckID,
|
|
||||||
Name: structs.SerfCheckName,
|
|
||||||
Status: api.HealthPassing,
|
|
||||||
Output: structs.SerfCheckAliveOutput,
|
|
||||||
},
|
|
||||||
EnterpriseMeta: *nodeEntMeta,
|
|
||||||
NodeMeta: map[string]string{
|
|
||||||
structs.MetaConsulVersion: buildVersion.String(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if node != nil {
|
|
||||||
req.TaggedAddresses = node.TaggedAddresses
|
|
||||||
req.NodeMeta = node.Meta
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = s.raftApply(structs.RegisterRequestType, &req)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleFailedMember is used to mark the node's status
|
|
||||||
// as being critical, along with all checks as unknown.
|
|
||||||
func (s *Server) handleFailedMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
|
||||||
if nodeEntMeta == nil {
|
|
||||||
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the node exists
|
|
||||||
state := s.fsm.State()
|
|
||||||
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if node == nil {
|
|
||||||
s.logger.Info("ignoring failed event for member because it does not exist in the catalog",
|
|
||||||
"member", member.Name,
|
|
||||||
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
|
||||||
)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if node.Address == member.Addr.String() {
|
|
||||||
// Check if the serfCheck is in the critical state
|
|
||||||
_, checks, err := state.NodeChecks(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, check := range checks {
|
|
||||||
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthCritical {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.logger.Info("member failed, marking health critical",
|
|
||||||
"member", member.Name,
|
|
||||||
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
|
||||||
)
|
|
||||||
|
|
||||||
// Register with the catalog
|
|
||||||
req := structs.RegisterRequest{
|
|
||||||
Datacenter: s.config.Datacenter,
|
|
||||||
Node: member.Name,
|
|
||||||
EnterpriseMeta: *nodeEntMeta,
|
|
||||||
ID: types.NodeID(member.Tags["id"]),
|
|
||||||
Address: member.Addr.String(),
|
|
||||||
Check: &structs.HealthCheck{
|
|
||||||
Node: member.Name,
|
|
||||||
CheckID: structs.SerfCheckID,
|
|
||||||
Name: structs.SerfCheckName,
|
|
||||||
Status: api.HealthCritical,
|
|
||||||
Output: structs.SerfCheckFailedOutput,
|
|
||||||
},
|
|
||||||
|
|
||||||
// If there's existing information about the node, do not
|
|
||||||
// clobber it.
|
|
||||||
SkipNodeUpdate: true,
|
|
||||||
}
|
|
||||||
_, err = s.raftApply(structs.RegisterRequestType, &req)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleLeftMember is used to handle members that gracefully
|
|
||||||
// left. They are deregistered if necessary.
|
|
||||||
func (s *Server) handleLeftMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
|
||||||
return s.handleDeregisterMember("left", member, nodeEntMeta)
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleReapMember is used to handle members that have been
|
|
||||||
// reaped after a prolonged failure. They are deregistered.
|
|
||||||
func (s *Server) handleReapMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
|
||||||
return s.handleDeregisterMember("reaped", member, nodeEntMeta)
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleDeregisterMember is used to deregister a member of a given reason
|
|
||||||
func (s *Server) handleDeregisterMember(reason string, member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
|
||||||
if nodeEntMeta == nil {
|
|
||||||
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do not deregister ourself. This can only happen if the current leader
|
|
||||||
// is leaving. Instead, we should allow a follower to take-over and
|
|
||||||
// deregister us later.
|
|
||||||
//
|
|
||||||
// TODO(partitions): check partitions here too? server names should be unique in general though
|
|
||||||
if strings.EqualFold(member.Name, s.config.NodeName) {
|
|
||||||
s.logger.Warn("deregistering self should be done by follower",
|
|
||||||
"name", s.config.NodeName,
|
|
||||||
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
|
||||||
)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove from Raft peers if this was a server
|
|
||||||
if valid, _ := metadata.IsConsulServer(member); valid {
|
|
||||||
if err := s.removeConsulServer(member); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the node does not exist
|
|
||||||
state := s.fsm.State()
|
|
||||||
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deregister the node
|
|
||||||
s.logger.Info("deregistering member",
|
|
||||||
"member", member.Name,
|
|
||||||
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
|
||||||
"reason", reason,
|
|
||||||
)
|
|
||||||
req := structs.DeregisterRequest{
|
|
||||||
Datacenter: s.config.Datacenter,
|
|
||||||
Node: member.Name,
|
|
||||||
EnterpriseMeta: *nodeEntMeta,
|
|
||||||
}
|
|
||||||
_, err = s.raftApply(structs.DeregisterRequestType, &req)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// joinConsulServer is used to try to join another consul server
|
// joinConsulServer is used to try to join another consul server
|
||||||
func (s *Server) joinConsulServer(m serf.Member, parts *metadata.Server) error {
|
func (s *Server) joinConsulServer(m serf.Member, parts *metadata.Server) error {
|
||||||
// Check for possibility of multiple bootstrap nodes
|
// Check for possibility of multiple bootstrap nodes
|
||||||
|
@ -1464,6 +1230,66 @@ func (s *serversIntentionsAsConfigEntriesInfo) update(srv *metadata.Server) bool
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Server) initConsulService(ctx context.Context, client pbresource.ResourceServiceClient) error {
|
||||||
|
service := &pbcatalog.Service{
|
||||||
|
Workloads: &pbcatalog.WorkloadSelector{
|
||||||
|
Prefixes: []string{consulWorkloadPrefix},
|
||||||
|
},
|
||||||
|
Ports: []*pbcatalog.ServicePort{
|
||||||
|
{
|
||||||
|
TargetPort: consulPortNameServer,
|
||||||
|
Protocol: pbcatalog.Protocol_PROTOCOL_TCP,
|
||||||
|
// No virtual port defined for now, as we assume this is generally for Service Discovery
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
serviceData, err := anypb.New(service)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not convert Service to `any` message: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a default namespace in default partition
|
||||||
|
serviceID := &pbresource.ID{
|
||||||
|
Type: pbcatalog.ServiceType,
|
||||||
|
Name: structs.ConsulServiceName,
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
}
|
||||||
|
|
||||||
|
serviceResource := &pbresource.Resource{
|
||||||
|
Id: serviceID,
|
||||||
|
Data: serviceData,
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := client.Read(ctx, &pbresource.ReadRequest{Id: serviceID})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("failed to read the %s Service: %w", structs.ConsulServiceName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
existingService := res.GetResource()
|
||||||
|
s.logger.Debug("existingService consul Service found")
|
||||||
|
|
||||||
|
// If the Service is identical, we're done.
|
||||||
|
if cmp.Equal(serviceResource, existingService, resourceCmpOptions...) {
|
||||||
|
s.logger.Debug("no updates to perform on consul Service")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the existing Service is different, add the Version to the patch for CAS write.
|
||||||
|
serviceResource.Id = existingService.Id
|
||||||
|
serviceResource.Version = existingService.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = client.Write(ctx, &pbresource.WriteRequest{Resource: serviceResource})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create the %s service: %w", structs.ConsulServiceName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.logger.Info("Created consul Service in catalog")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) initTenancy(ctx context.Context, b storage.Backend) error {
|
func (s *Server) initTenancy(ctx context.Context, b storage.Backend) error {
|
||||||
// we write these defaults directly to the storage backend
|
// we write these defaults directly to the storage backend
|
||||||
// without going through the resource service since tenancy
|
// without going through the resource service since tenancy
|
||||||
|
|
|
@ -0,0 +1,279 @@
|
||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package consul
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/hashicorp/go-hclog"
|
||||||
|
"github.com/hashicorp/serf/serf"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/acl"
|
||||||
|
"github.com/hashicorp/consul/agent/consul/fsm"
|
||||||
|
"github.com/hashicorp/consul/agent/metadata"
|
||||||
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/api"
|
||||||
|
"github.com/hashicorp/consul/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ ConsulRegistrator = (*V1ConsulRegistrator)(nil)
|
||||||
|
|
||||||
|
type V1ConsulRegistrator struct {
|
||||||
|
Datacenter string
|
||||||
|
FSM *fsm.FSM
|
||||||
|
Logger hclog.Logger
|
||||||
|
NodeName string
|
||||||
|
|
||||||
|
RaftApplyFunc func(t structs.MessageType, msg any) (any, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleAliveMember is used to ensure the node
|
||||||
|
// is registered, with a passing health check.
|
||||||
|
func (r V1ConsulRegistrator) HandleAliveMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, joinServer func(m serf.Member, parts *metadata.Server) error) error {
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register consul service if a server
|
||||||
|
var service *structs.NodeService
|
||||||
|
if valid, parts := metadata.IsConsulServer(member); valid {
|
||||||
|
service = &structs.NodeService{
|
||||||
|
ID: structs.ConsulServiceID,
|
||||||
|
Service: structs.ConsulServiceName,
|
||||||
|
Port: parts.Port,
|
||||||
|
Weights: &structs.Weights{
|
||||||
|
Passing: 1,
|
||||||
|
Warning: 1,
|
||||||
|
},
|
||||||
|
EnterpriseMeta: *nodeEntMeta,
|
||||||
|
Meta: map[string]string{
|
||||||
|
// DEPRECATED - remove nonvoter in favor of read_replica in a future version of consul
|
||||||
|
"non_voter": strconv.FormatBool(member.Tags["nonvoter"] == "1"),
|
||||||
|
"read_replica": strconv.FormatBool(member.Tags["read_replica"] == "1"),
|
||||||
|
"raft_version": strconv.Itoa(parts.RaftVersion),
|
||||||
|
"serf_protocol_current": strconv.FormatUint(uint64(member.ProtocolCur), 10),
|
||||||
|
"serf_protocol_min": strconv.FormatUint(uint64(member.ProtocolMin), 10),
|
||||||
|
"serf_protocol_max": strconv.FormatUint(uint64(member.ProtocolMax), 10),
|
||||||
|
"version": parts.Build.String(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if parts.ExternalGRPCPort > 0 {
|
||||||
|
service.Meta["grpc_port"] = strconv.Itoa(parts.ExternalGRPCPort)
|
||||||
|
}
|
||||||
|
if parts.ExternalGRPCTLSPort > 0 {
|
||||||
|
service.Meta["grpc_tls_port"] = strconv.Itoa(parts.ExternalGRPCTLSPort)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to join the consul server
|
||||||
|
if err := joinServer(member, parts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the node exists
|
||||||
|
state := r.FSM.State()
|
||||||
|
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if node != nil && node.Address == member.Addr.String() {
|
||||||
|
// Check if the associated service is available
|
||||||
|
if service != nil {
|
||||||
|
match := false
|
||||||
|
_, services, err := state.NodeServices(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if services != nil {
|
||||||
|
for id, serv := range services.Services {
|
||||||
|
if id == service.ID {
|
||||||
|
// If metadata are different, be sure to update it
|
||||||
|
match = reflect.DeepEqual(serv.Meta, service.Meta)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !match {
|
||||||
|
goto AFTER_CHECK
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the serfCheck is in the passing state
|
||||||
|
_, checks, err := state.NodeChecks(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, check := range checks {
|
||||||
|
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthPassing {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AFTER_CHECK:
|
||||||
|
r.Logger.Info("member joined, marking health alive",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Get consul version from serf member
|
||||||
|
// add this as node meta in catalog register request
|
||||||
|
buildVersion, err := metadata.Build(&member)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register with the catalog.
|
||||||
|
req := structs.RegisterRequest{
|
||||||
|
Datacenter: r.Datacenter,
|
||||||
|
Node: member.Name,
|
||||||
|
ID: types.NodeID(member.Tags["id"]),
|
||||||
|
Address: member.Addr.String(),
|
||||||
|
Service: service,
|
||||||
|
Check: &structs.HealthCheck{
|
||||||
|
Node: member.Name,
|
||||||
|
CheckID: structs.SerfCheckID,
|
||||||
|
Name: structs.SerfCheckName,
|
||||||
|
Status: api.HealthPassing,
|
||||||
|
Output: structs.SerfCheckAliveOutput,
|
||||||
|
},
|
||||||
|
EnterpriseMeta: *nodeEntMeta,
|
||||||
|
NodeMeta: map[string]string{
|
||||||
|
structs.MetaConsulVersion: buildVersion.String(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
|
req.TaggedAddresses = node.TaggedAddresses
|
||||||
|
req.NodeMeta = node.Meta
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = r.RaftApplyFunc(structs.RegisterRequestType, &req)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleFailedMember is used to mark the node's status
|
||||||
|
// as being critical, along with all checks as unknown.
|
||||||
|
func (r V1ConsulRegistrator) HandleFailedMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the node exists
|
||||||
|
state := r.FSM.State()
|
||||||
|
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if node == nil {
|
||||||
|
r.Logger.Info("ignoring failed event for member because it does not exist in the catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.Address == member.Addr.String() {
|
||||||
|
// Check if the serfCheck is in the critical state
|
||||||
|
_, checks, err := state.NodeChecks(nil, member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, check := range checks {
|
||||||
|
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthCritical {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.Logger.Info("member failed, marking health critical",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Register with the catalog
|
||||||
|
req := structs.RegisterRequest{
|
||||||
|
Datacenter: r.Datacenter,
|
||||||
|
Node: member.Name,
|
||||||
|
EnterpriseMeta: *nodeEntMeta,
|
||||||
|
ID: types.NodeID(member.Tags["id"]),
|
||||||
|
Address: member.Addr.String(),
|
||||||
|
Check: &structs.HealthCheck{
|
||||||
|
Node: member.Name,
|
||||||
|
CheckID: structs.SerfCheckID,
|
||||||
|
Name: structs.SerfCheckName,
|
||||||
|
Status: api.HealthCritical,
|
||||||
|
Output: structs.SerfCheckFailedOutput,
|
||||||
|
},
|
||||||
|
|
||||||
|
// If there's existing information about the node, do not
|
||||||
|
// clobber it.
|
||||||
|
SkipNodeUpdate: true,
|
||||||
|
}
|
||||||
|
_, err = r.RaftApplyFunc(structs.RegisterRequestType, &req)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleLeftMember is used to handle members that gracefully
|
||||||
|
// left. They are deregistered if necessary.
|
||||||
|
func (r V1ConsulRegistrator) HandleLeftMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
return r.handleDeregisterMember("left", member, nodeEntMeta, removeServerFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleReapMember is used to handle members that have been
|
||||||
|
// reaped after a prolonged failure. They are deregistered.
|
||||||
|
func (r V1ConsulRegistrator) HandleReapMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
return r.handleDeregisterMember("reaped", member, nodeEntMeta, removeServerFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleDeregisterMember is used to deregister a member of a given reason
|
||||||
|
func (r V1ConsulRegistrator) handleDeregisterMember(reason string, member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not deregister ourself. This can only happen if the current leader
|
||||||
|
// is leaving. Instead, we should allow a follower to take-over and
|
||||||
|
// deregister us later.
|
||||||
|
//
|
||||||
|
// TODO(partitions): check partitions here too? server names should be unique in general though
|
||||||
|
if strings.EqualFold(member.Name, r.NodeName) {
|
||||||
|
r.Logger.Warn("deregistering self should be done by follower",
|
||||||
|
"name", r.NodeName,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove from Raft peers if this was a server
|
||||||
|
if valid, _ := metadata.IsConsulServer(member); valid {
|
||||||
|
if err := removeServerFunc(member); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the node does not exist
|
||||||
|
state := r.FSM.State()
|
||||||
|
_, node, err := state.GetNode(member.Name, nodeEntMeta, structs.DefaultPeerKeyword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deregister the node
|
||||||
|
r.Logger.Info("deregistering member",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
"reason", reason,
|
||||||
|
)
|
||||||
|
req := structs.DeregisterRequest{
|
||||||
|
Datacenter: r.Datacenter,
|
||||||
|
Node: member.Name,
|
||||||
|
EnterpriseMeta: *nodeEntMeta,
|
||||||
|
}
|
||||||
|
_, err = r.RaftApplyFunc(structs.DeregisterRequestType, &req)
|
||||||
|
return err
|
||||||
|
}
|
|
@ -0,0 +1,887 @@
|
||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package consul
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/serf/serf"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/api"
|
||||||
|
"github.com/hashicorp/consul/sdk/freeport"
|
||||||
|
"github.com/hashicorp/consul/sdk/testutil/retry"
|
||||||
|
"github.com/hashicorp/consul/testrpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLeader_RegisterMember(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
|
||||||
|
// Client should be registered
|
||||||
|
state := s1.fsm.State()
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Should have a check
|
||||||
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if len(checks) != 1 {
|
||||||
|
t.Fatalf("client missing check")
|
||||||
|
}
|
||||||
|
if checks[0].CheckID != structs.SerfCheckID {
|
||||||
|
t.Fatalf("bad check: %v", checks[0])
|
||||||
|
}
|
||||||
|
if checks[0].Name != structs.SerfCheckName {
|
||||||
|
t.Fatalf("bad check: %v", checks[0])
|
||||||
|
}
|
||||||
|
if checks[0].Status != api.HealthPassing {
|
||||||
|
t.Fatalf("bad check: %v", checks[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Server should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(s1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatalf("server not registered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Service should be registered
|
||||||
|
_, services, err := state.NodeServices(nil, s1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if _, ok := services.Services["consul"]; !ok {
|
||||||
|
t.Fatalf("consul service not registered: %v", services)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_FailedMember(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
// Fail the member
|
||||||
|
c1.Shutdown()
|
||||||
|
|
||||||
|
// Should be registered
|
||||||
|
state := s1.fsm.State()
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Should have a check
|
||||||
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if len(checks) != 1 {
|
||||||
|
t.Fatalf("client missing check")
|
||||||
|
}
|
||||||
|
if checks[0].CheckID != structs.SerfCheckID {
|
||||||
|
t.Fatalf("bad check: %v", checks[0])
|
||||||
|
}
|
||||||
|
if checks[0].Name != structs.SerfCheckName {
|
||||||
|
t.Fatalf("bad check: %v", checks[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, checks, err = state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if len(checks) != 1 {
|
||||||
|
r.Fatalf("client missing check")
|
||||||
|
}
|
||||||
|
if got, want := checks[0].Status, api.HealthCritical; got != want {
|
||||||
|
r.Fatalf("got status %q want %q", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_LeftMember(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
// Should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.NotNil(r, node, "client not registered")
|
||||||
|
})
|
||||||
|
|
||||||
|
// Node should leave
|
||||||
|
c1.Leave()
|
||||||
|
c1.Shutdown()
|
||||||
|
|
||||||
|
// Should be deregistered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.Nil(r, node, "client still registered")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_ReapMember(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
// Should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.NotNil(r, node, "client not registered")
|
||||||
|
})
|
||||||
|
|
||||||
|
// Simulate a node reaping
|
||||||
|
mems := s1.LANMembersInAgentPartition()
|
||||||
|
var c1mem serf.Member
|
||||||
|
for _, m := range mems {
|
||||||
|
if m.Name == c1.config.NodeName {
|
||||||
|
c1mem = m
|
||||||
|
c1mem.Status = StatusReap
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s1.reconcileCh <- c1mem
|
||||||
|
|
||||||
|
// Should be deregistered; we have to poll quickly here because
|
||||||
|
// anti-entropy will put it back.
|
||||||
|
reaped := false
|
||||||
|
for start := time.Now(); time.Since(start) < 5*time.Second; {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
if node == nil {
|
||||||
|
reaped = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !reaped {
|
||||||
|
t.Fatalf("client should not be registered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_ReapOrLeftMember_IgnoreSelf(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
run := func(t *testing.T, status serf.MemberStatus, nameFn func(string) string) {
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
nodeName := s1.config.NodeName
|
||||||
|
if nameFn != nil {
|
||||||
|
nodeName = nameFn(nodeName)
|
||||||
|
}
|
||||||
|
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
// Should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(nodeName, nil, "")
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.NotNil(r, node, "server not registered")
|
||||||
|
})
|
||||||
|
|
||||||
|
// Simulate THIS node reaping or leaving
|
||||||
|
mems := s1.LANMembersInAgentPartition()
|
||||||
|
var s1mem serf.Member
|
||||||
|
for _, m := range mems {
|
||||||
|
if strings.EqualFold(m.Name, nodeName) {
|
||||||
|
s1mem = m
|
||||||
|
s1mem.Status = status
|
||||||
|
s1mem.Name = nodeName
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s1.reconcileCh <- s1mem
|
||||||
|
|
||||||
|
// Should NOT be deregistered; we have to poll quickly here because
|
||||||
|
// anti-entropy will put it back if it did get deleted.
|
||||||
|
reaped := false
|
||||||
|
for start := time.Now(); time.Since(start) < 5*time.Second; {
|
||||||
|
_, node, err := state.GetNode(nodeName, nil, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
if node == nil {
|
||||||
|
reaped = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if reaped {
|
||||||
|
t.Fatalf("server should still be registered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("original name", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
t.Run("left", func(t *testing.T) {
|
||||||
|
run(t, serf.StatusLeft, nil)
|
||||||
|
})
|
||||||
|
t.Run("reap", func(t *testing.T) {
|
||||||
|
run(t, StatusReap, nil)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("uppercased name", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
t.Run("left", func(t *testing.T) {
|
||||||
|
run(t, serf.StatusLeft, strings.ToUpper)
|
||||||
|
})
|
||||||
|
t.Run("reap", func(t *testing.T) {
|
||||||
|
run(t, StatusReap, strings.ToUpper)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_CheckServersMeta(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
ports := freeport.GetN(t, 2) // s3 grpc, s3 grpc_tls
|
||||||
|
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = true
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = false
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = false
|
||||||
|
c.GRPCPort = ports[0]
|
||||||
|
c.GRPCTLSPort = ports[1]
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, s1, s2)
|
||||||
|
joinLAN(t, s1, s3)
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
testrpc.WaitForLeader(t, s2.RPC, "dc1")
|
||||||
|
testrpc.WaitForLeader(t, s3.RPC, "dc1")
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
consulService := &structs.NodeService{
|
||||||
|
ID: "consul",
|
||||||
|
Service: "consul",
|
||||||
|
}
|
||||||
|
// s3 should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, service, err := state.NodeService(nil, s3.config.NodeName, "consul", &consulService.EnterpriseMeta, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if service == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
if service.Meta["non_voter"] != "false" {
|
||||||
|
r.Fatalf("Expected to be non_voter == false, was: %s", service.Meta["non_voter"])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
member := serf.Member{}
|
||||||
|
for _, m := range s1.serfLAN.Members() {
|
||||||
|
if m.Name == s3.config.NodeName {
|
||||||
|
member = m
|
||||||
|
member.Tags = make(map[string]string)
|
||||||
|
for key, value := range m.Tags {
|
||||||
|
member.Tags[key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if member.Name != s3.config.NodeName {
|
||||||
|
t.Fatal("could not find node in serf members")
|
||||||
|
}
|
||||||
|
versionToExpect := "19.7.9"
|
||||||
|
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
// DEPRECATED - remove nonvoter tag in favor of read_replica in a future version of consul
|
||||||
|
member.Tags["nonvoter"] = "1"
|
||||||
|
member.Tags["read_replica"] = "1"
|
||||||
|
member.Tags["build"] = versionToExpect
|
||||||
|
err := s1.registrator.HandleAliveMember(member, nil, s1.joinConsulServer)
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("Unexpected error :%v", err)
|
||||||
|
}
|
||||||
|
_, service, err := state.NodeService(nil, s3.config.NodeName, "consul", &consulService.EnterpriseMeta, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if service == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
// DEPRECATED - remove non_voter in favor of read_replica in a future version of consul
|
||||||
|
if service.Meta["non_voter"] != "true" {
|
||||||
|
r.Fatalf("Expected to be non_voter == true, was: %s", service.Meta["non_voter"])
|
||||||
|
}
|
||||||
|
if service.Meta["read_replica"] != "true" {
|
||||||
|
r.Fatalf("Expected to be read_replica == true, was: %s", service.Meta["non_voter"])
|
||||||
|
}
|
||||||
|
newVersion := service.Meta["version"]
|
||||||
|
if newVersion != versionToExpect {
|
||||||
|
r.Fatalf("Expected version to be updated to %s, was %s", versionToExpect, newVersion)
|
||||||
|
}
|
||||||
|
grpcPort := service.Meta["grpc_port"]
|
||||||
|
if grpcPort != strconv.Itoa(ports[0]) {
|
||||||
|
r.Fatalf("Expected grpc port to be %d, was %s", ports[0], grpcPort)
|
||||||
|
}
|
||||||
|
grpcTLSPort := service.Meta["grpc_tls_port"]
|
||||||
|
if grpcTLSPort != strconv.Itoa(ports[1]) {
|
||||||
|
r.Fatalf("Expected grpc tls port to be %d, was %s", ports[1], grpcTLSPort)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_ReapServer(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = true
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = false
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
||||||
|
c.Bootstrap = false
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, s1, s2)
|
||||||
|
joinLAN(t, s1, s3)
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
testrpc.WaitForLeader(t, s2.RPC, "dc1")
|
||||||
|
testrpc.WaitForLeader(t, s3.RPC, "dc1")
|
||||||
|
state := s1.fsm.State()
|
||||||
|
|
||||||
|
// s3 should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(s3.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// call reconcileReaped with a map that does not contain s3
|
||||||
|
knownMembers := make(map[string]struct{})
|
||||||
|
knownMembers[s1.config.NodeName] = struct{}{}
|
||||||
|
knownMembers[s2.config.NodeName] = struct{}{}
|
||||||
|
|
||||||
|
err := s1.reconcileReaped(knownMembers, nil)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected error :%v", err)
|
||||||
|
}
|
||||||
|
// s3 should be deregistered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(s3.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
|
r.Fatalf("server with id %v should not be registered", s3.config.NodeID)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_Reconcile_ReapMember(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
|
||||||
|
// Register a non-existing member
|
||||||
|
dead := structs.RegisterRequest{
|
||||||
|
Datacenter: s1.config.Datacenter,
|
||||||
|
Node: "no-longer-around",
|
||||||
|
Address: "127.1.1.1",
|
||||||
|
Check: &structs.HealthCheck{
|
||||||
|
Node: "no-longer-around",
|
||||||
|
CheckID: structs.SerfCheckID,
|
||||||
|
Name: structs.SerfCheckName,
|
||||||
|
Status: api.HealthCritical,
|
||||||
|
},
|
||||||
|
WriteRequest: structs.WriteRequest{
|
||||||
|
Token: "root",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
var out struct{}
|
||||||
|
if err := s1.RPC(context.Background(), "Catalog.Register", &dead, &out); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force a reconciliation
|
||||||
|
if err := s1.reconcile(); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node should be gone
|
||||||
|
state := s1.fsm.State()
|
||||||
|
_, node, err := state.GetNode("no-longer-around", nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
|
t.Fatalf("client registered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_Reconcile(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.PrimaryDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLInitialManagementToken = "root"
|
||||||
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
// Join before we have a leader, this should cause a reconcile!
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
// Should not be registered
|
||||||
|
state := s1.fsm.State()
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
|
t.Fatalf("client registered")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should be registered
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_Reconcile_Races(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
|
||||||
|
dir2, c1 := testClient(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer c1.Shutdown()
|
||||||
|
|
||||||
|
joinLAN(t, c1, s1)
|
||||||
|
|
||||||
|
// Wait for the server to reconcile the client and register it.
|
||||||
|
state := s1.fsm.State()
|
||||||
|
var nodeAddr string
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
r.Fatal("client not registered")
|
||||||
|
}
|
||||||
|
nodeAddr = node.Address
|
||||||
|
})
|
||||||
|
|
||||||
|
// Add in some metadata via the catalog (as if the agent synced it
|
||||||
|
// there). We also set the serfHealth check to failing so the reconcile
|
||||||
|
// will attempt to flip it back
|
||||||
|
req := structs.RegisterRequest{
|
||||||
|
Datacenter: s1.config.Datacenter,
|
||||||
|
Node: c1.config.NodeName,
|
||||||
|
ID: c1.config.NodeID,
|
||||||
|
Address: nodeAddr,
|
||||||
|
NodeMeta: map[string]string{"hello": "world"},
|
||||||
|
Check: &structs.HealthCheck{
|
||||||
|
Node: c1.config.NodeName,
|
||||||
|
CheckID: structs.SerfCheckID,
|
||||||
|
Name: structs.SerfCheckName,
|
||||||
|
Status: api.HealthCritical,
|
||||||
|
Output: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
var out struct{}
|
||||||
|
if err := s1.RPC(context.Background(), "Catalog.Register", &req, &out); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force a reconcile and make sure the metadata stuck around.
|
||||||
|
if err := s1.reconcile(); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
t.Fatalf("bad")
|
||||||
|
}
|
||||||
|
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
||||||
|
t.Fatalf("bad")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fail the member and wait for the health to go critical.
|
||||||
|
c1.Shutdown()
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if len(checks) != 1 {
|
||||||
|
r.Fatalf("client missing check")
|
||||||
|
}
|
||||||
|
if got, want := checks[0].Status, api.HealthCritical; got != want {
|
||||||
|
r.Fatalf("got state %q want %q", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Make sure the metadata didn't get clobbered.
|
||||||
|
_, node, err = state.GetNode(c1.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
t.Fatalf("bad")
|
||||||
|
}
|
||||||
|
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
||||||
|
t.Fatalf("bad")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_LeftServer(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
|
||||||
|
// Put s1 last so we don't trigger a leader election.
|
||||||
|
servers := []*Server{s2, s3, s1}
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, s2, s1)
|
||||||
|
joinLAN(t, s3, s1)
|
||||||
|
for _, s := range servers {
|
||||||
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill any server
|
||||||
|
servers[0].Shutdown()
|
||||||
|
|
||||||
|
// Force remove the non-leader (transition to left state)
|
||||||
|
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName, false, nil); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait until the remaining servers show only 2 peers.
|
||||||
|
for _, s := range servers[1:] {
|
||||||
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 2)) })
|
||||||
|
}
|
||||||
|
s1.Shutdown()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_LeftLeader(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
servers := []*Server{s1, s2, s3}
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, s2, s1)
|
||||||
|
joinLAN(t, s3, s1)
|
||||||
|
|
||||||
|
for _, s := range servers {
|
||||||
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill the leader!
|
||||||
|
var leader *Server
|
||||||
|
for _, s := range servers {
|
||||||
|
if s.IsLeader() {
|
||||||
|
leader = s
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if leader == nil {
|
||||||
|
t.Fatalf("Should have a leader")
|
||||||
|
}
|
||||||
|
if !leader.isReadyForConsistentReads() {
|
||||||
|
t.Fatalf("Expected leader to be ready for consistent reads ")
|
||||||
|
}
|
||||||
|
leader.Leave()
|
||||||
|
if leader.isReadyForConsistentReads() {
|
||||||
|
t.Fatalf("Expected consistent read state to be false ")
|
||||||
|
}
|
||||||
|
leader.Shutdown()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var remain *Server
|
||||||
|
for _, s := range servers {
|
||||||
|
if s == leader {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
remain = s
|
||||||
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 2)) })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the old leader is deregistered
|
||||||
|
state := remain.fsm.State()
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, node, err := state.GetNode(leader.config.NodeName, nil, "")
|
||||||
|
if err != nil {
|
||||||
|
r.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
|
r.Fatal("leader should be deregistered")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeader_MultiBootstrap(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
servers := []*Server{s1, s2}
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
joinLAN(t, s2, s1)
|
||||||
|
|
||||||
|
for _, s := range servers {
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
if got, want := len(s.serfLAN.Members()), 2; got != want {
|
||||||
|
r.Fatalf("got %d peers want %d", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we don't have multiple raft peers
|
||||||
|
for _, s := range servers {
|
||||||
|
peers, _ := s.autopilot.NumVoters()
|
||||||
|
if peers != 1 {
|
||||||
|
t.Fatalf("should only have 1 raft peer!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,407 @@
|
||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package consul
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/hashicorp/go-hclog"
|
||||||
|
"github.com/hashicorp/serf/serf"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
"google.golang.org/grpc/status"
|
||||||
|
"google.golang.org/protobuf/testing/protocmp"
|
||||||
|
"google.golang.org/protobuf/types/known/anypb"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/acl"
|
||||||
|
"github.com/hashicorp/consul/agent/metadata"
|
||||||
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/internal/resource"
|
||||||
|
pbcatalog "github.com/hashicorp/consul/proto-public/pbcatalog/v2beta1"
|
||||||
|
"github.com/hashicorp/consul/proto-public/pbresource"
|
||||||
|
"github.com/hashicorp/consul/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
consulWorkloadPrefix = "consul-server-"
|
||||||
|
consulPortNameServer = "server"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ ConsulRegistrator = (*V2ConsulRegistrator)(nil)
|
||||||
|
|
||||||
|
var resourceCmpOptions = []cmp.Option{
|
||||||
|
protocmp.IgnoreFields(&pbresource.Resource{}, "status", "generation", "version"),
|
||||||
|
protocmp.IgnoreFields(&pbresource.ID{}, "uid"),
|
||||||
|
protocmp.Transform(),
|
||||||
|
// Stringify any type passed to the sorter so that we can reliably compare most values.
|
||||||
|
cmpopts.SortSlices(func(a, b any) bool { return fmt.Sprintf("%v", a) < fmt.Sprintf("%v", b) }),
|
||||||
|
}
|
||||||
|
|
||||||
|
type V2ConsulRegistrator struct {
|
||||||
|
Logger hclog.Logger
|
||||||
|
NodeName string
|
||||||
|
EntMeta *acl.EnterpriseMeta
|
||||||
|
|
||||||
|
Client pbresource.ResourceServiceClient
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleAliveMember is used to ensure the server is registered as a Workload
|
||||||
|
// with a passing health check.
|
||||||
|
func (r V2ConsulRegistrator) HandleAliveMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, joinServer func(m serf.Member, parts *metadata.Server) error) error {
|
||||||
|
valid, parts := metadata.IsConsulServer(member)
|
||||||
|
if !valid {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to join the consul server, regardless of the existing catalog state
|
||||||
|
if err := joinServer(member, parts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("member joined, creating catalog entries",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
workloadResource, err := r.createWorkloadFromMember(member, parts, nodeEntMeta)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the Workload already exists and if it's the same
|
||||||
|
res, err := r.Client.Read(context.TODO(), &pbresource.ReadRequest{Id: workloadResource.Id})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("error checking for existing Workload %s: %w", workloadResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
existingWorkload := res.GetResource()
|
||||||
|
|
||||||
|
r.Logger.Debug("existing Workload matching the member found",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// If the Workload is identical, move to updating the health status
|
||||||
|
if cmp.Equal(workloadResource, existingWorkload, resourceCmpOptions...) {
|
||||||
|
r.Logger.Debug("no updates to perform on member Workload",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
goto HEALTHSTATUS
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the existing Workload different, add the existing Version into the patch for CAS write
|
||||||
|
workloadResource.Id = existingWorkload.Id
|
||||||
|
workloadResource.Version = existingWorkload.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.Client.Write(context.TODO(), &pbresource.WriteRequest{Resource: workloadResource}); err != nil {
|
||||||
|
return fmt.Errorf("failed to write Workload %s: %w", workloadResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("updated consul Workload in catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
HEALTHSTATUS:
|
||||||
|
hsResource, err := r.createHealthStatusFromMember(member, workloadResource.Id, true, nodeEntMeta)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the HealthStatus already exists and if it's the same
|
||||||
|
res, err = r.Client.Read(context.TODO(), &pbresource.ReadRequest{Id: hsResource.Id})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("error checking for existing HealthStatus %s: %w", hsResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
existingHS := res.GetResource()
|
||||||
|
|
||||||
|
r.Logger.Debug("existing HealthStatus matching the member found",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// If the HealthStatus is identical, we're done.
|
||||||
|
if cmp.Equal(hsResource, existingHS, resourceCmpOptions...) {
|
||||||
|
r.Logger.Debug("no updates to perform on member HealthStatus",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the existing HealthStatus is different, add the Version to the patch for CAS write.
|
||||||
|
hsResource.Id = existingHS.Id
|
||||||
|
hsResource.Version = existingHS.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.Client.Write(context.TODO(), &pbresource.WriteRequest{Resource: hsResource}); err != nil {
|
||||||
|
return fmt.Errorf("failed to write HealthStatus %s: %w", hsResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
r.Logger.Info("updated consul HealthStatus in catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r V2ConsulRegistrator) createWorkloadFromMember(member serf.Member, parts *metadata.Server, nodeEntMeta *acl.EnterpriseMeta) (*pbresource.Resource, error) {
|
||||||
|
workloadMeta := map[string]string{
|
||||||
|
"read_replica": strconv.FormatBool(member.Tags["read_replica"] == "1"),
|
||||||
|
"raft_version": strconv.Itoa(parts.RaftVersion),
|
||||||
|
"serf_protocol_current": strconv.FormatUint(uint64(member.ProtocolCur), 10),
|
||||||
|
"serf_protocol_min": strconv.FormatUint(uint64(member.ProtocolMin), 10),
|
||||||
|
"serf_protocol_max": strconv.FormatUint(uint64(member.ProtocolMax), 10),
|
||||||
|
"version": parts.Build.String(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if parts.ExternalGRPCPort > 0 {
|
||||||
|
workloadMeta["grpc_port"] = strconv.Itoa(parts.ExternalGRPCPort)
|
||||||
|
}
|
||||||
|
if parts.ExternalGRPCTLSPort > 0 {
|
||||||
|
workloadMeta["grpc_tls_port"] = strconv.Itoa(parts.ExternalGRPCTLSPort)
|
||||||
|
}
|
||||||
|
|
||||||
|
workload := &pbcatalog.Workload{
|
||||||
|
Addresses: []*pbcatalog.WorkloadAddress{
|
||||||
|
{Host: member.Addr.String(), Ports: []string{consulPortNameServer}},
|
||||||
|
},
|
||||||
|
// Don't include identity since Consul is not routable through the mesh.
|
||||||
|
// Don't include locality because these values are not passed along through serf, and they are probably
|
||||||
|
// different from the leader's values.
|
||||||
|
Ports: map[string]*pbcatalog.WorkloadPort{
|
||||||
|
consulPortNameServer: {
|
||||||
|
Port: uint32(parts.Port),
|
||||||
|
Protocol: pbcatalog.Protocol_PROTOCOL_TCP,
|
||||||
|
},
|
||||||
|
// TODO: add other agent ports
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
workloadData, err := anypb.New(workload)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("could not convert Workload to 'any' type: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
workloadId := &pbresource.ID{
|
||||||
|
Name: fmt.Sprintf("%s%s", consulWorkloadPrefix, types.NodeID(member.Tags["id"])),
|
||||||
|
Type: pbcatalog.WorkloadType,
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
}
|
||||||
|
workloadId.Tenancy.Partition = nodeEntMeta.PartitionOrDefault()
|
||||||
|
|
||||||
|
return &pbresource.Resource{
|
||||||
|
Id: workloadId,
|
||||||
|
Data: workloadData,
|
||||||
|
Metadata: workloadMeta,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r V2ConsulRegistrator) createHealthStatusFromMember(member serf.Member, workloadId *pbresource.ID, passing bool, nodeEntMeta *acl.EnterpriseMeta) (*pbresource.Resource, error) {
|
||||||
|
hs := &pbcatalog.HealthStatus{
|
||||||
|
Type: string(structs.SerfCheckID),
|
||||||
|
Description: structs.SerfCheckName,
|
||||||
|
}
|
||||||
|
|
||||||
|
if passing {
|
||||||
|
hs.Status = pbcatalog.Health_HEALTH_PASSING
|
||||||
|
hs.Output = structs.SerfCheckAliveOutput
|
||||||
|
} else {
|
||||||
|
hs.Status = pbcatalog.Health_HEALTH_CRITICAL
|
||||||
|
hs.Output = structs.SerfCheckFailedOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
hsData, err := anypb.New(hs)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("could not convert HealthStatus to 'any' type: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hsId := &pbresource.ID{
|
||||||
|
Name: fmt.Sprintf("%s%s", consulWorkloadPrefix, types.NodeID(member.Tags["id"])),
|
||||||
|
Type: pbcatalog.HealthStatusType,
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
}
|
||||||
|
hsId.Tenancy.Partition = nodeEntMeta.PartitionOrDefault()
|
||||||
|
|
||||||
|
return &pbresource.Resource{
|
||||||
|
Id: hsId,
|
||||||
|
Data: hsData,
|
||||||
|
Owner: workloadId,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleFailedMember is used to mark the workload's associated HealthStatus.
|
||||||
|
func (r V2ConsulRegistrator) HandleFailedMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta) error {
|
||||||
|
if valid, _ := metadata.IsConsulServer(member); !valid {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("member failed",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validate that the associated workload exists
|
||||||
|
workloadId := &pbresource.ID{
|
||||||
|
Name: fmt.Sprintf("%s%s", consulWorkloadPrefix, types.NodeID(member.Tags["id"])),
|
||||||
|
Type: pbcatalog.WorkloadType,
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
}
|
||||||
|
workloadId.Tenancy.Partition = nodeEntMeta.PartitionOrDefault()
|
||||||
|
|
||||||
|
res, err := r.Client.Read(context.TODO(), &pbresource.ReadRequest{Id: workloadId})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("error checking for existing Workload %s: %w", workloadId.Name, err)
|
||||||
|
}
|
||||||
|
if grpcNotFoundErr(err) {
|
||||||
|
r.Logger.Info("ignoring failed event for member because it does not exist in the catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Overwrite the workload ID with the one that has UID populated.
|
||||||
|
existingWorkload := res.GetResource()
|
||||||
|
|
||||||
|
hsResource, err := r.createHealthStatusFromMember(member, existingWorkload.Id, false, nodeEntMeta)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err = r.Client.Read(context.TODO(), &pbresource.ReadRequest{Id: hsResource.Id})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("error checking for existing HealthStatus %s: %w", hsResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
existingHS := res.GetResource()
|
||||||
|
r.Logger.Debug("existing HealthStatus matching the member found",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// If the HealthStatus is identical, we're done.
|
||||||
|
if cmp.Equal(hsResource, existingHS, resourceCmpOptions...) {
|
||||||
|
r.Logger.Debug("no updates to perform on member HealthStatus",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the existing HealthStatus is different, add the Version to the patch for CAS write.
|
||||||
|
hsResource.Id = existingHS.Id
|
||||||
|
hsResource.Version = existingHS.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.Client.Write(context.TODO(), &pbresource.WriteRequest{Resource: hsResource}); err != nil {
|
||||||
|
return fmt.Errorf("failed to write HealthStatus %s: %w", hsResource.Id.Name, err)
|
||||||
|
}
|
||||||
|
r.Logger.Info("updated consul HealthStatus in catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleLeftMember is used to handle members that gracefully
|
||||||
|
// left. They are removed if necessary.
|
||||||
|
func (r V2ConsulRegistrator) HandleLeftMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
return r.handleDeregisterMember("left", member, nodeEntMeta, removeServerFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleReapMember is used to handle members that have been
|
||||||
|
// reaped after a prolonged failure. They are removed from the catalog.
|
||||||
|
func (r V2ConsulRegistrator) HandleReapMember(member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
return r.handleDeregisterMember("reaped", member, nodeEntMeta, removeServerFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleDeregisterMember is used to remove a member of a given reason
|
||||||
|
func (r V2ConsulRegistrator) handleDeregisterMember(reason string, member serf.Member, nodeEntMeta *acl.EnterpriseMeta, removeServerFunc func(m serf.Member) error) error {
|
||||||
|
if valid, _ := metadata.IsConsulServer(member); !valid {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeEntMeta == nil {
|
||||||
|
nodeEntMeta = structs.NodeEnterpriseMetaInDefaultPartition()
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("removing member",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
"reason", reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
if err := removeServerFunc(member); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not remove our self. This can only happen if the current leader
|
||||||
|
// is leaving. Instead, we should allow a follower to take-over and
|
||||||
|
// remove us later.
|
||||||
|
if strings.EqualFold(member.Name, r.NodeName) &&
|
||||||
|
strings.EqualFold(nodeEntMeta.PartitionOrDefault(), r.EntMeta.PartitionOrDefault()) {
|
||||||
|
r.Logger.Warn("removing self should be done by follower",
|
||||||
|
"name", r.NodeName,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
"reason", reason,
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the workload exists
|
||||||
|
workloadID := &pbresource.ID{
|
||||||
|
Name: fmt.Sprintf("%s%s", consulWorkloadPrefix, types.NodeID(member.Tags["id"])),
|
||||||
|
Type: pbcatalog.WorkloadType,
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
}
|
||||||
|
workloadID.Tenancy.Partition = nodeEntMeta.PartitionOrDefault()
|
||||||
|
|
||||||
|
res, err := r.Client.Read(context.TODO(), &pbresource.ReadRequest{Id: workloadID})
|
||||||
|
if err != nil && !grpcNotFoundErr(err) {
|
||||||
|
return fmt.Errorf("error checking for existing Workload %s: %w", workloadID.Name, err)
|
||||||
|
}
|
||||||
|
if grpcNotFoundErr(err) {
|
||||||
|
r.Logger.Info("ignoring reap event for member because it does not exist in the catalog",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
existingWorkload := res.GetResource()
|
||||||
|
|
||||||
|
// The HealthStatus should be reaped automatically
|
||||||
|
if _, err := r.Client.Delete(context.TODO(), &pbresource.DeleteRequest{Id: existingWorkload.Id}); err != nil {
|
||||||
|
return fmt.Errorf("failed to delete Workload %s: %w", existingWorkload.Id.Name, err)
|
||||||
|
}
|
||||||
|
r.Logger.Info("deleted consul Workload",
|
||||||
|
"member", member.Name,
|
||||||
|
"partition", getSerfMemberEnterpriseMeta(member).PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func grpcNotFoundErr(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
s, ok := status.FromError(err)
|
||||||
|
return ok && s.Code() == codes.NotFound
|
||||||
|
}
|
|
@ -0,0 +1,583 @@
|
||||||
|
// Copyright (c) HashiCorp, Inc.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
|
package consul
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/hashicorp/go-hclog"
|
||||||
|
"github.com/hashicorp/serf/serf"
|
||||||
|
"github.com/stretchr/testify/mock"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
"google.golang.org/grpc/status"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
"google.golang.org/protobuf/types/known/anypb"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/acl"
|
||||||
|
"github.com/hashicorp/consul/agent/metadata"
|
||||||
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
mockpbresource "github.com/hashicorp/consul/grpcmocks/proto-public/pbresource"
|
||||||
|
"github.com/hashicorp/consul/internal/resource"
|
||||||
|
pbcatalog "github.com/hashicorp/consul/proto-public/pbcatalog/v2beta1"
|
||||||
|
"github.com/hashicorp/consul/proto-public/pbresource"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
fakeWrappedErr = fmt.Errorf("fake test error")
|
||||||
|
)
|
||||||
|
|
||||||
|
type testCase struct {
|
||||||
|
name string
|
||||||
|
member serf.Member
|
||||||
|
nodeNameOverride string // This is used in the HandleLeftMember test to avoid deregistering ourself
|
||||||
|
|
||||||
|
existingWorkload *pbresource.Resource
|
||||||
|
workloadReadErr bool
|
||||||
|
workloadWriteErr bool
|
||||||
|
workloadDeleteErr bool
|
||||||
|
|
||||||
|
existingHealthStatus *pbresource.Resource
|
||||||
|
healthstatusReadErr bool
|
||||||
|
healthstatusWriteErr bool
|
||||||
|
|
||||||
|
mutatedWorkload *pbresource.Resource // leaving one of these out means the mock expects not to have a write/delete called
|
||||||
|
mutatedHealthStatus *pbresource.Resource
|
||||||
|
expErr string
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_HandleAliveMember(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
run := func(t *testing.T, tt testCase) {
|
||||||
|
client := mockpbresource.NewResourceServiceClient(t)
|
||||||
|
mockClient := client.EXPECT()
|
||||||
|
|
||||||
|
// Build mock expectations based on the order of HandleAliveMember resource calls
|
||||||
|
setupReadExpectation(t, mockClient, getTestWorkloadId(), tt.existingWorkload, tt.workloadReadErr)
|
||||||
|
setupWriteExpectation(t, mockClient, tt.mutatedWorkload, tt.workloadWriteErr)
|
||||||
|
if !tt.workloadReadErr && !tt.workloadWriteErr {
|
||||||
|
// We expect to bail before this read if there is an error earlier in the function
|
||||||
|
setupReadExpectation(t, mockClient, getTestHealthstatusId(), tt.existingHealthStatus, tt.healthstatusReadErr)
|
||||||
|
}
|
||||||
|
setupWriteExpectation(t, mockClient, tt.mutatedHealthStatus, tt.healthstatusWriteErr)
|
||||||
|
|
||||||
|
registrator := V2ConsulRegistrator{
|
||||||
|
Logger: hclog.New(&hclog.LoggerOptions{}),
|
||||||
|
NodeName: "test-server-1",
|
||||||
|
Client: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mock join function
|
||||||
|
var joinMockCalled bool
|
||||||
|
joinMock := func(_ serf.Member, _ *metadata.Server) error {
|
||||||
|
joinMockCalled = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := registrator.HandleAliveMember(tt.member, acl.DefaultEnterpriseMeta(), joinMock)
|
||||||
|
if tt.expErr != "" {
|
||||||
|
require.Contains(t, err.Error(), tt.expErr)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
require.True(t, joinMockCalled, "the mock join function was not called")
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []testCase{
|
||||||
|
{
|
||||||
|
name: "New alive member",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No updates needed",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
existingHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Existing Workload and HS need to be updated",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
existingWorkload: getTestWorkloadWithPort(t, 8301),
|
||||||
|
existingHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Only the HS needs to be updated",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
existingHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error reading Workload",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
workloadReadErr: true,
|
||||||
|
expErr: "error checking for existing Workload",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error writing Workload",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
workloadWriteErr: true,
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
expErr: "failed to write Workload",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error reading HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
healthstatusReadErr: true,
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
expErr: "error checking for existing HealthStatus",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error writing HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
healthstatusWriteErr: true,
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
expErr: "failed to write HealthStatus",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
run(t, tt)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_HandleFailedMember(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
run := func(t *testing.T, tt testCase) {
|
||||||
|
client := mockpbresource.NewResourceServiceClient(t)
|
||||||
|
mockClient := client.EXPECT()
|
||||||
|
|
||||||
|
// Build mock expectations based on the order of HandleFailed resource calls
|
||||||
|
setupReadExpectation(t, mockClient, getTestWorkloadId(), tt.existingWorkload, tt.workloadReadErr)
|
||||||
|
if !tt.workloadReadErr && tt.existingWorkload != nil {
|
||||||
|
// We expect to bail before this read if there is an error earlier in the function or there is no workload
|
||||||
|
setupReadExpectation(t, mockClient, getTestHealthstatusId(), tt.existingHealthStatus, tt.healthstatusReadErr)
|
||||||
|
}
|
||||||
|
setupWriteExpectation(t, mockClient, tt.mutatedHealthStatus, tt.healthstatusWriteErr)
|
||||||
|
|
||||||
|
registrator := V2ConsulRegistrator{
|
||||||
|
Logger: hclog.New(&hclog.LoggerOptions{}),
|
||||||
|
NodeName: "test-server-1",
|
||||||
|
Client: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := registrator.HandleFailedMember(tt.member, acl.DefaultEnterpriseMeta())
|
||||||
|
if tt.expErr != "" {
|
||||||
|
require.Contains(t, err.Error(), tt.expErr)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []testCase{
|
||||||
|
{
|
||||||
|
name: "Update non-existent HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Underlying Workload does not exist",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Update an existing HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
existingHealthStatus: getTestHealthStatus(t, true),
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "HealthStatus is already critical - no updates needed",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
existingHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error reading Workload",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
workloadReadErr: true,
|
||||||
|
expErr: "error checking for existing Workload",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error reading HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
healthstatusReadErr: true,
|
||||||
|
expErr: "error checking for existing HealthStatus",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error writing HealthStatus",
|
||||||
|
member: getTestSerfMember(serf.StatusFailed),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
healthstatusWriteErr: true,
|
||||||
|
mutatedHealthStatus: getTestHealthStatus(t, false),
|
||||||
|
expErr: "failed to write HealthStatus",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
run(t, tt)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test_HandleLeftMember also tests HandleReapMembers, which are the same core logic with some different logs.
|
||||||
|
func Test_HandleLeftMember(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
run := func(t *testing.T, tt testCase) {
|
||||||
|
client := mockpbresource.NewResourceServiceClient(t)
|
||||||
|
mockClient := client.EXPECT()
|
||||||
|
|
||||||
|
// Build mock expectations based on the order of HandleLeftMember resource calls
|
||||||
|
// We check for the override, which we use to skip self de-registration
|
||||||
|
if tt.nodeNameOverride == "" {
|
||||||
|
setupReadExpectation(t, mockClient, getTestWorkloadId(), tt.existingWorkload, tt.workloadReadErr)
|
||||||
|
if tt.existingWorkload != nil && !tt.workloadReadErr {
|
||||||
|
setupDeleteExpectation(t, mockClient, tt.mutatedWorkload, tt.workloadDeleteErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName := "test-server-2" // This is not the same as the serf node so we don't dergister ourself.
|
||||||
|
if tt.nodeNameOverride != "" {
|
||||||
|
nodeName = tt.nodeNameOverride
|
||||||
|
}
|
||||||
|
|
||||||
|
registrator := V2ConsulRegistrator{
|
||||||
|
Logger: hclog.New(&hclog.LoggerOptions{}),
|
||||||
|
NodeName: nodeName, // We change this so that we don't deregister ourself
|
||||||
|
Client: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mock join function
|
||||||
|
var removeMockCalled bool
|
||||||
|
removeMock := func(_ serf.Member) error {
|
||||||
|
removeMockCalled = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := registrator.HandleLeftMember(tt.member, acl.DefaultEnterpriseMeta(), removeMock)
|
||||||
|
if tt.expErr != "" {
|
||||||
|
require.Contains(t, err.Error(), tt.expErr)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
require.True(t, removeMockCalled, "the mock remove function was not called")
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []testCase{
|
||||||
|
{
|
||||||
|
name: "Remove member",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
existingWorkload: getTestWorkload(t),
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Don't deregister ourself",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
nodeNameOverride: "test-server-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Don't do anything if the Workload is already gone",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Remove member regardless of Workload payload",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
existingWorkload: getTestWorkloadWithPort(t, 8301),
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error reading Workload",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
workloadReadErr: true,
|
||||||
|
expErr: "error checking for existing Workload",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Error deleting Workload",
|
||||||
|
member: getTestSerfMember(serf.StatusAlive),
|
||||||
|
workloadDeleteErr: true,
|
||||||
|
existingWorkload: getTestWorkloadWithPort(t, 8301),
|
||||||
|
mutatedWorkload: getTestWorkload(t),
|
||||||
|
expErr: "failed to delete Workload",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
run(t, tt)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupReadExpectation(
|
||||||
|
t *testing.T,
|
||||||
|
mockClient *mockpbresource.ResourceServiceClient_Expecter,
|
||||||
|
expectedId *pbresource.ID,
|
||||||
|
existingResource *pbresource.Resource,
|
||||||
|
sendErr bool) {
|
||||||
|
|
||||||
|
if sendErr {
|
||||||
|
mockClient.Read(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, fakeWrappedErr).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.ReadRequest)
|
||||||
|
require.True(t, proto.Equal(expectedId, req.Id))
|
||||||
|
})
|
||||||
|
} else if existingResource != nil {
|
||||||
|
mockClient.Read(mock.Anything, mock.Anything).
|
||||||
|
Return(&pbresource.ReadResponse{
|
||||||
|
Resource: existingResource,
|
||||||
|
}, nil).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.ReadRequest)
|
||||||
|
require.True(t, proto.Equal(expectedId, req.Id))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
mockClient.Read(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, status.Error(codes.NotFound, "not found")).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.ReadRequest)
|
||||||
|
require.True(t, proto.Equal(expectedId, req.Id))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupWriteExpectation(
|
||||||
|
t *testing.T,
|
||||||
|
mockClient *mockpbresource.ResourceServiceClient_Expecter,
|
||||||
|
expectedResource *pbresource.Resource,
|
||||||
|
sendErr bool) {
|
||||||
|
|
||||||
|
// If there is no expected resource, we take that to mean we don't expect any client writes.
|
||||||
|
if expectedResource == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if sendErr {
|
||||||
|
mockClient.Write(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, fakeWrappedErr).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.WriteRequest)
|
||||||
|
require.True(t, proto.Equal(expectedResource, req.Resource))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
mockClient.Write(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, nil).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.WriteRequest)
|
||||||
|
require.True(t, proto.Equal(expectedResource, req.Resource))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupDeleteExpectation(
|
||||||
|
t *testing.T,
|
||||||
|
mockClient *mockpbresource.ResourceServiceClient_Expecter,
|
||||||
|
expectedResource *pbresource.Resource,
|
||||||
|
sendErr bool) {
|
||||||
|
|
||||||
|
expectedId := expectedResource.GetId()
|
||||||
|
|
||||||
|
if sendErr {
|
||||||
|
mockClient.Delete(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, fakeWrappedErr).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.DeleteRequest)
|
||||||
|
require.True(t, proto.Equal(expectedId, req.Id))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
mockClient.Delete(mock.Anything, mock.Anything).
|
||||||
|
Return(nil, nil).
|
||||||
|
Once().
|
||||||
|
Run(func(args mock.Arguments) {
|
||||||
|
req := args.Get(1).(*pbresource.DeleteRequest)
|
||||||
|
require.True(t, proto.Equal(expectedId, req.Id))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestWorkload(t *testing.T) *pbresource.Resource {
|
||||||
|
return getTestWorkloadWithPort(t, 8300)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestWorkloadWithPort(t *testing.T, port int) *pbresource.Resource {
|
||||||
|
workload := &pbcatalog.Workload{
|
||||||
|
Addresses: []*pbcatalog.WorkloadAddress{
|
||||||
|
{Host: "127.0.0.1", Ports: []string{consulPortNameServer}},
|
||||||
|
},
|
||||||
|
Ports: map[string]*pbcatalog.WorkloadPort{
|
||||||
|
consulPortNameServer: {
|
||||||
|
Port: uint32(port),
|
||||||
|
Protocol: pbcatalog.Protocol_PROTOCOL_TCP,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data, err := anypb.New(workload)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return &pbresource.Resource{
|
||||||
|
Id: getTestWorkloadId(),
|
||||||
|
Data: data,
|
||||||
|
Metadata: map[string]string{
|
||||||
|
"read_replica": "false",
|
||||||
|
"raft_version": "3",
|
||||||
|
"serf_protocol_current": "2",
|
||||||
|
"serf_protocol_min": "1",
|
||||||
|
"serf_protocol_max": "5",
|
||||||
|
"version": "1.18.0",
|
||||||
|
"grpc_port": "8502",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestWorkloadId() *pbresource.ID {
|
||||||
|
return &pbresource.ID{
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
Type: pbcatalog.WorkloadType,
|
||||||
|
Name: "consul-server-72af047d-1857-2493-969e-53614a70b25a",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestHealthStatus(t *testing.T, passing bool) *pbresource.Resource {
|
||||||
|
healthStatus := &pbcatalog.HealthStatus{
|
||||||
|
Type: string(structs.SerfCheckID),
|
||||||
|
Description: structs.SerfCheckName,
|
||||||
|
}
|
||||||
|
|
||||||
|
if passing {
|
||||||
|
healthStatus.Status = pbcatalog.Health_HEALTH_PASSING
|
||||||
|
healthStatus.Output = structs.SerfCheckAliveOutput
|
||||||
|
} else {
|
||||||
|
healthStatus.Status = pbcatalog.Health_HEALTH_CRITICAL
|
||||||
|
healthStatus.Output = structs.SerfCheckFailedOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := anypb.New(healthStatus)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return &pbresource.Resource{
|
||||||
|
Id: getTestHealthstatusId(),
|
||||||
|
Data: data,
|
||||||
|
Owner: getTestWorkloadId(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestHealthstatusId() *pbresource.ID {
|
||||||
|
return &pbresource.ID{
|
||||||
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
|
Type: pbcatalog.HealthStatusType,
|
||||||
|
Name: "consul-server-72af047d-1857-2493-969e-53614a70b25a",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestSerfMember(status serf.MemberStatus) serf.Member {
|
||||||
|
return serf.Member{
|
||||||
|
Name: "test-server-1",
|
||||||
|
Addr: net.ParseIP("127.0.0.1"),
|
||||||
|
Port: 8300,
|
||||||
|
// representative tags from a local dev deployment of ENT
|
||||||
|
Tags: map[string]string{
|
||||||
|
"vsn_min": "2",
|
||||||
|
"vsn": "2",
|
||||||
|
"acls": "1",
|
||||||
|
"ft_si": "1",
|
||||||
|
"raft_vsn": "3",
|
||||||
|
"grpc_port": "8502",
|
||||||
|
"wan_join_port": "8500",
|
||||||
|
"dc": "dc1",
|
||||||
|
"segment": "",
|
||||||
|
"id": "72af047d-1857-2493-969e-53614a70b25a",
|
||||||
|
"ft_admpart": "1",
|
||||||
|
"role": "consul",
|
||||||
|
"build": "1.18.0",
|
||||||
|
"ft_ns": "1",
|
||||||
|
"vsn_max": "3",
|
||||||
|
"bootstrap": "1",
|
||||||
|
"expect": "1",
|
||||||
|
"port": "8300",
|
||||||
|
},
|
||||||
|
Status: status,
|
||||||
|
ProtocolMin: 1,
|
||||||
|
ProtocolMax: 5,
|
||||||
|
ProtocolCur: 2,
|
||||||
|
DelegateMin: 2,
|
||||||
|
DelegateMax: 5,
|
||||||
|
DelegateCur: 4,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test_ResourceCmpOptions_GeneratedFieldInsensitive makes sure are protocmp options are working as expected.
|
||||||
|
func Test_ResourceCmpOptions_GeneratedFieldInsensitive(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
res1 := getTestWorkload(t)
|
||||||
|
res2 := getTestWorkload(t)
|
||||||
|
|
||||||
|
// Modify the generated fields
|
||||||
|
res2.Id.Uid = "123456"
|
||||||
|
res2.Version = "789"
|
||||||
|
res2.Generation = "millenial"
|
||||||
|
res2.Status = map[string]*pbresource.Status{
|
||||||
|
"foo": {ObservedGeneration: "124"},
|
||||||
|
}
|
||||||
|
|
||||||
|
require.True(t, cmp.Equal(res1, res2, resourceCmpOptions...))
|
||||||
|
|
||||||
|
res1.Metadata["foo"] = "bar"
|
||||||
|
|
||||||
|
require.False(t, cmp.Equal(res1, res2, resourceCmpOptions...))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test gRPC Error Codes Conditions
|
||||||
|
func Test_grpcNotFoundErr(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
err error
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Nil Error",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Nonsense Error",
|
||||||
|
err: fmt.Errorf("boooooo!"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gRPC Permission Denied Error",
|
||||||
|
err: status.Error(codes.PermissionDenied, "permission denied is not NotFound"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gRPC NotFound Error",
|
||||||
|
err: status.Error(codes.NotFound, "bingo: not found"),
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
require.Equal(t, tt.expected, grpcNotFoundErr(tt.err))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -10,7 +10,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
@ -24,878 +23,73 @@ import (
|
||||||
msgpackrpc "github.com/hashicorp/consul-net-rpc/net-rpc-msgpackrpc"
|
msgpackrpc "github.com/hashicorp/consul-net-rpc/net-rpc-msgpackrpc"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/acl"
|
"github.com/hashicorp/consul/acl"
|
||||||
|
"github.com/hashicorp/consul/agent/leafcert"
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
tokenStore "github.com/hashicorp/consul/agent/token"
|
tokenStore "github.com/hashicorp/consul/agent/token"
|
||||||
"github.com/hashicorp/consul/api"
|
"github.com/hashicorp/consul/api"
|
||||||
"github.com/hashicorp/consul/sdk/freeport"
|
"github.com/hashicorp/consul/internal/resource"
|
||||||
|
pbcatalog "github.com/hashicorp/consul/proto-public/pbcatalog/v2beta1"
|
||||||
|
"github.com/hashicorp/consul/proto-public/pbresource"
|
||||||
"github.com/hashicorp/consul/sdk/testutil"
|
"github.com/hashicorp/consul/sdk/testutil"
|
||||||
"github.com/hashicorp/consul/sdk/testutil/retry"
|
"github.com/hashicorp/consul/sdk/testutil/retry"
|
||||||
"github.com/hashicorp/consul/testrpc"
|
"github.com/hashicorp/consul/testrpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLeader_RegisterMember(t *testing.T) {
|
func enableV2(t *testing.T) func(deps *Deps) {
|
||||||
if testing.Short() {
|
return func(deps *Deps) {
|
||||||
t.Skip("too slow for testing.Short")
|
deps.Experiments = []string{"resource-apis"}
|
||||||
}
|
m, _ := leafcert.NewTestManager(t, nil)
|
||||||
|
deps.LeafCertManager = m
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
|
|
||||||
// Client should be registered
|
|
||||||
state := s1.fsm.State()
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Should have a check
|
|
||||||
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if len(checks) != 1 {
|
|
||||||
t.Fatalf("client missing check")
|
|
||||||
}
|
|
||||||
if checks[0].CheckID != structs.SerfCheckID {
|
|
||||||
t.Fatalf("bad check: %v", checks[0])
|
|
||||||
}
|
|
||||||
if checks[0].Name != structs.SerfCheckName {
|
|
||||||
t.Fatalf("bad check: %v", checks[0])
|
|
||||||
}
|
|
||||||
if checks[0].Status != api.HealthPassing {
|
|
||||||
t.Fatalf("bad check: %v", checks[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Server should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(s1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatalf("server not registered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Service should be registered
|
|
||||||
_, services, err := state.NodeServices(nil, s1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if _, ok := services.Services["consul"]; !ok {
|
|
||||||
t.Fatalf("consul service not registered: %v", services)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLeader_FailedMember(t *testing.T) {
|
// Test that Consul service is created in V2.
|
||||||
if testing.Short() {
|
// In V1, the service is implicitly created - this is covered in leader_registrator_v1_test.go
|
||||||
t.Skip("too slow for testing.Short")
|
func Test_InitConsulService(t *testing.T) {
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
// Fail the member
|
|
||||||
c1.Shutdown()
|
|
||||||
|
|
||||||
// Should be registered
|
|
||||||
state := s1.fsm.State()
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Should have a check
|
|
||||||
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if len(checks) != 1 {
|
|
||||||
t.Fatalf("client missing check")
|
|
||||||
}
|
|
||||||
if checks[0].CheckID != structs.SerfCheckID {
|
|
||||||
t.Fatalf("bad check: %v", checks[0])
|
|
||||||
}
|
|
||||||
if checks[0].Name != structs.SerfCheckName {
|
|
||||||
t.Fatalf("bad check: %v", checks[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, checks, err = state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if len(checks) != 1 {
|
|
||||||
r.Fatalf("client missing check")
|
|
||||||
}
|
|
||||||
if got, want := checks[0].Status, api.HealthCritical; got != want {
|
|
||||||
r.Fatalf("got status %q want %q", got, want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_LeftMember(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
state := s1.fsm.State()
|
|
||||||
|
|
||||||
// Should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
require.NoError(r, err)
|
|
||||||
require.NotNil(r, node, "client not registered")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Node should leave
|
|
||||||
c1.Leave()
|
|
||||||
c1.Shutdown()
|
|
||||||
|
|
||||||
// Should be deregistered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
require.NoError(r, err)
|
|
||||||
require.Nil(r, node, "client still registered")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_ReapMember(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
state := s1.fsm.State()
|
|
||||||
|
|
||||||
// Should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
require.NoError(r, err)
|
|
||||||
require.NotNil(r, node, "client not registered")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Simulate a node reaping
|
|
||||||
mems := s1.LANMembersInAgentPartition()
|
|
||||||
var c1mem serf.Member
|
|
||||||
for _, m := range mems {
|
|
||||||
if m.Name == c1.config.NodeName {
|
|
||||||
c1mem = m
|
|
||||||
c1mem.Status = StatusReap
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s1.reconcileCh <- c1mem
|
|
||||||
|
|
||||||
// Should be deregistered; we have to poll quickly here because
|
|
||||||
// anti-entropy will put it back.
|
|
||||||
reaped := false
|
|
||||||
for start := time.Now(); time.Since(start) < 5*time.Second; {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
require.NoError(t, err)
|
|
||||||
if node == nil {
|
|
||||||
reaped = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !reaped {
|
|
||||||
t.Fatalf("client should not be registered")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_ReapOrLeftMember_IgnoreSelf(t *testing.T) {
|
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
t.Skip("too slow for testing.Short")
|
t.Skip("too slow for testing.Short")
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
run := func(t *testing.T, status serf.MemberStatus, nameFn func(string) string) {
|
dir, s := testServerWithDepsAndConfig(t, enableV2(t),
|
||||||
t.Parallel()
|
func(c *Config) {
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
c.PrimaryDatacenter = "dc1"
|
||||||
c.ACLsEnabled = true
|
c.ACLsEnabled = true
|
||||||
c.ACLInitialManagementToken = "root"
|
c.ACLInitialManagementToken = "root"
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
||||||
})
|
})
|
||||||
defer os.RemoveAll(dir1)
|
defer os.RemoveAll(dir)
|
||||||
defer s1.Shutdown()
|
defer s.Shutdown()
|
||||||
|
|
||||||
nodeName := s1.config.NodeName
|
testrpc.WaitForRaftLeader(t, s.RPC, "dc1", testrpc.WithToken("root"))
|
||||||
if nameFn != nil {
|
|
||||||
nodeName = nameFn(nodeName)
|
|
||||||
}
|
|
||||||
|
|
||||||
state := s1.fsm.State()
|
client := s.insecureResourceServiceClient
|
||||||
|
|
||||||
// Should be registered
|
consulServiceID := &pbresource.ID{
|
||||||
retry.Run(t, func(r *retry.R) {
|
Name: structs.ConsulServiceName,
|
||||||
_, node, err := state.GetNode(nodeName, nil, "")
|
Type: pbcatalog.ServiceType,
|
||||||
require.NoError(r, err)
|
Tenancy: resource.DefaultNamespacedTenancy(),
|
||||||
require.NotNil(r, node, "server not registered")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Simulate THIS node reaping or leaving
|
|
||||||
mems := s1.LANMembersInAgentPartition()
|
|
||||||
var s1mem serf.Member
|
|
||||||
for _, m := range mems {
|
|
||||||
if strings.EqualFold(m.Name, nodeName) {
|
|
||||||
s1mem = m
|
|
||||||
s1mem.Status = status
|
|
||||||
s1mem.Name = nodeName
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s1.reconcileCh <- s1mem
|
|
||||||
|
|
||||||
// Should NOT be deregistered; we have to poll quickly here because
|
|
||||||
// anti-entropy will put it back if it did get deleted.
|
|
||||||
reaped := false
|
|
||||||
for start := time.Now(); time.Since(start) < 5*time.Second; {
|
|
||||||
_, node, err := state.GetNode(nodeName, nil, "")
|
|
||||||
require.NoError(t, err)
|
|
||||||
if node == nil {
|
|
||||||
reaped = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if reaped {
|
|
||||||
t.Fatalf("server should still be registered")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run("original name", func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
t.Run("left", func(t *testing.T) {
|
|
||||||
run(t, serf.StatusLeft, nil)
|
|
||||||
})
|
|
||||||
t.Run("reap", func(t *testing.T) {
|
|
||||||
run(t, StatusReap, nil)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("uppercased name", func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
t.Run("left", func(t *testing.T) {
|
|
||||||
run(t, serf.StatusLeft, strings.ToUpper)
|
|
||||||
})
|
|
||||||
t.Run("reap", func(t *testing.T) {
|
|
||||||
run(t, StatusReap, strings.ToUpper)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_CheckServersMeta(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
ports := freeport.GetN(t, 2) // s3 grpc, s3 grpc_tls
|
|
||||||
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = true
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = false
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer s2.Shutdown()
|
|
||||||
|
|
||||||
dir3, s3 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = false
|
|
||||||
c.GRPCPort = ports[0]
|
|
||||||
c.GRPCTLSPort = ports[1]
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir3)
|
|
||||||
defer s3.Shutdown()
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, s1, s2)
|
|
||||||
joinLAN(t, s1, s3)
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
testrpc.WaitForLeader(t, s2.RPC, "dc1")
|
|
||||||
testrpc.WaitForLeader(t, s3.RPC, "dc1")
|
|
||||||
state := s1.fsm.State()
|
|
||||||
|
|
||||||
consulService := &structs.NodeService{
|
|
||||||
ID: "consul",
|
|
||||||
Service: "consul",
|
|
||||||
}
|
|
||||||
// s3 should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
retry.Run(t, func(r *retry.R) {
|
||||||
_, service, err := state.NodeService(nil, s3.config.NodeName, "consul", &consulService.EnterpriseMeta, "")
|
res, err := client.Read(context.Background(), &pbresource.ReadRequest{Id: consulServiceID})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Fatalf("err: %v", err)
|
r.Fatalf("err: %v", err)
|
||||||
}
|
}
|
||||||
if service == nil {
|
data := res.GetResource().GetData()
|
||||||
r.Fatal("client not registered")
|
require.NotNil(r, data)
|
||||||
}
|
|
||||||
if service.Meta["non_voter"] != "false" {
|
var service pbcatalog.Service
|
||||||
r.Fatalf("Expected to be non_voter == false, was: %s", service.Meta["non_voter"])
|
err = data.UnmarshalTo(&service)
|
||||||
}
|
require.NoError(r, err)
|
||||||
|
|
||||||
|
// Spot check the Service
|
||||||
|
require.Equal(r, service.GetWorkloads().GetPrefixes(), []string{consulWorkloadPrefix})
|
||||||
|
require.GreaterOrEqual(r, len(service.GetPorts()), 1)
|
||||||
|
|
||||||
|
//Since we're not running a full agent w/ serf, we can't check for valid endpoints
|
||||||
})
|
})
|
||||||
|
|
||||||
member := serf.Member{}
|
|
||||||
for _, m := range s1.serfLAN.Members() {
|
|
||||||
if m.Name == s3.config.NodeName {
|
|
||||||
member = m
|
|
||||||
member.Tags = make(map[string]string)
|
|
||||||
for key, value := range m.Tags {
|
|
||||||
member.Tags[key] = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if member.Name != s3.config.NodeName {
|
|
||||||
t.Fatal("could not find node in serf members")
|
|
||||||
}
|
|
||||||
versionToExpect := "19.7.9"
|
|
||||||
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
// DEPRECATED - remove nonvoter tag in favor of read_replica in a future version of consul
|
|
||||||
member.Tags["nonvoter"] = "1"
|
|
||||||
member.Tags["read_replica"] = "1"
|
|
||||||
member.Tags["build"] = versionToExpect
|
|
||||||
err := s1.handleAliveMember(member, nil)
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("Unexpected error :%v", err)
|
|
||||||
}
|
|
||||||
_, service, err := state.NodeService(nil, s3.config.NodeName, "consul", &consulService.EnterpriseMeta, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if service == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
// DEPRECATED - remove non_voter in favor of read_replica in a future version of consul
|
|
||||||
if service.Meta["non_voter"] != "true" {
|
|
||||||
r.Fatalf("Expected to be non_voter == true, was: %s", service.Meta["non_voter"])
|
|
||||||
}
|
|
||||||
if service.Meta["read_replica"] != "true" {
|
|
||||||
r.Fatalf("Expected to be read_replica == true, was: %s", service.Meta["non_voter"])
|
|
||||||
}
|
|
||||||
newVersion := service.Meta["version"]
|
|
||||||
if newVersion != versionToExpect {
|
|
||||||
r.Fatalf("Expected version to be updated to %s, was %s", versionToExpect, newVersion)
|
|
||||||
}
|
|
||||||
grpcPort := service.Meta["grpc_port"]
|
|
||||||
if grpcPort != strconv.Itoa(ports[0]) {
|
|
||||||
r.Fatalf("Expected grpc port to be %d, was %s", ports[0], grpcPort)
|
|
||||||
}
|
|
||||||
grpcTLSPort := service.Meta["grpc_tls_port"]
|
|
||||||
if grpcTLSPort != strconv.Itoa(ports[1]) {
|
|
||||||
r.Fatalf("Expected grpc tls port to be %d, was %s", ports[1], grpcTLSPort)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_ReapServer(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = true
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = false
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer s2.Shutdown()
|
|
||||||
|
|
||||||
dir3, s3 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "allow"
|
|
||||||
c.Bootstrap = false
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir3)
|
|
||||||
defer s3.Shutdown()
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, s1, s2)
|
|
||||||
joinLAN(t, s1, s3)
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
testrpc.WaitForLeader(t, s2.RPC, "dc1")
|
|
||||||
testrpc.WaitForLeader(t, s3.RPC, "dc1")
|
|
||||||
state := s1.fsm.State()
|
|
||||||
|
|
||||||
// s3 should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(s3.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// call reconcileReaped with a map that does not contain s3
|
|
||||||
knownMembers := make(map[string]struct{})
|
|
||||||
knownMembers[s1.config.NodeName] = struct{}{}
|
|
||||||
knownMembers[s2.config.NodeName] = struct{}{}
|
|
||||||
|
|
||||||
err := s1.reconcileReaped(knownMembers, nil)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Unexpected error :%v", err)
|
|
||||||
}
|
|
||||||
// s3 should be deregistered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(s3.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node != nil {
|
|
||||||
r.Fatalf("server with id %v should not be registered", s3.config.NodeID)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_Reconcile_ReapMember(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
|
|
||||||
// Register a non-existing member
|
|
||||||
dead := structs.RegisterRequest{
|
|
||||||
Datacenter: s1.config.Datacenter,
|
|
||||||
Node: "no-longer-around",
|
|
||||||
Address: "127.1.1.1",
|
|
||||||
Check: &structs.HealthCheck{
|
|
||||||
Node: "no-longer-around",
|
|
||||||
CheckID: structs.SerfCheckID,
|
|
||||||
Name: structs.SerfCheckName,
|
|
||||||
Status: api.HealthCritical,
|
|
||||||
},
|
|
||||||
WriteRequest: structs.WriteRequest{
|
|
||||||
Token: "root",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
var out struct{}
|
|
||||||
if err := s1.RPC(context.Background(), "Catalog.Register", &dead, &out); err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Force a reconciliation
|
|
||||||
if err := s1.reconcile(); err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Node should be gone
|
|
||||||
state := s1.fsm.State()
|
|
||||||
_, node, err := state.GetNode("no-longer-around", nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node != nil {
|
|
||||||
t.Fatalf("client registered")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_Reconcile(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
||||||
c.PrimaryDatacenter = "dc1"
|
|
||||||
c.ACLsEnabled = true
|
|
||||||
c.ACLInitialManagementToken = "root"
|
|
||||||
c.ACLResolverSettings.ACLDefaultPolicy = "deny"
|
|
||||||
})
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
// Join before we have a leader, this should cause a reconcile!
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
// Should not be registered
|
|
||||||
state := s1.fsm.State()
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node != nil {
|
|
||||||
t.Fatalf("client registered")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should be registered
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_Reconcile_Races(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServer(t)
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
||||||
|
|
||||||
dir2, c1 := testClient(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer c1.Shutdown()
|
|
||||||
|
|
||||||
joinLAN(t, c1, s1)
|
|
||||||
|
|
||||||
// Wait for the server to reconcile the client and register it.
|
|
||||||
state := s1.fsm.State()
|
|
||||||
var nodeAddr string
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
r.Fatal("client not registered")
|
|
||||||
}
|
|
||||||
nodeAddr = node.Address
|
|
||||||
})
|
|
||||||
|
|
||||||
// Add in some metadata via the catalog (as if the agent synced it
|
|
||||||
// there). We also set the serfHealth check to failing so the reconcile
|
|
||||||
// will attempt to flip it back
|
|
||||||
req := structs.RegisterRequest{
|
|
||||||
Datacenter: s1.config.Datacenter,
|
|
||||||
Node: c1.config.NodeName,
|
|
||||||
ID: c1.config.NodeID,
|
|
||||||
Address: nodeAddr,
|
|
||||||
NodeMeta: map[string]string{"hello": "world"},
|
|
||||||
Check: &structs.HealthCheck{
|
|
||||||
Node: c1.config.NodeName,
|
|
||||||
CheckID: structs.SerfCheckID,
|
|
||||||
Name: structs.SerfCheckName,
|
|
||||||
Status: api.HealthCritical,
|
|
||||||
Output: "",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
var out struct{}
|
|
||||||
if err := s1.RPC(context.Background(), "Catalog.Register", &req, &out); err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Force a reconcile and make sure the metadata stuck around.
|
|
||||||
if err := s1.reconcile(); err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
_, node, err := state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
t.Fatalf("bad")
|
|
||||||
}
|
|
||||||
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
|
||||||
t.Fatalf("bad")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fail the member and wait for the health to go critical.
|
|
||||||
c1.Shutdown()
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, checks, err := state.NodeChecks(nil, c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if len(checks) != 1 {
|
|
||||||
r.Fatalf("client missing check")
|
|
||||||
}
|
|
||||||
if got, want := checks[0].Status, api.HealthCritical; got != want {
|
|
||||||
r.Fatalf("got state %q want %q", got, want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Make sure the metadata didn't get clobbered.
|
|
||||||
_, node, err = state.GetNode(c1.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
t.Fatalf("bad")
|
|
||||||
}
|
|
||||||
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
|
||||||
t.Fatalf("bad")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_LeftServer(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServer(t)
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer s2.Shutdown()
|
|
||||||
|
|
||||||
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
||||||
defer os.RemoveAll(dir3)
|
|
||||||
defer s3.Shutdown()
|
|
||||||
|
|
||||||
// Put s1 last so we don't trigger a leader election.
|
|
||||||
servers := []*Server{s2, s3, s1}
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, s2, s1)
|
|
||||||
joinLAN(t, s3, s1)
|
|
||||||
for _, s := range servers {
|
|
||||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kill any server
|
|
||||||
servers[0].Shutdown()
|
|
||||||
|
|
||||||
// Force remove the non-leader (transition to left state)
|
|
||||||
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName, false, nil); err != nil {
|
|
||||||
t.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait until the remaining servers show only 2 peers.
|
|
||||||
for _, s := range servers[1:] {
|
|
||||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 2)) })
|
|
||||||
}
|
|
||||||
s1.Shutdown()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_LeftLeader(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("too slow for testing.Short")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServer(t)
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer s2.Shutdown()
|
|
||||||
|
|
||||||
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
||||||
defer os.RemoveAll(dir3)
|
|
||||||
defer s3.Shutdown()
|
|
||||||
servers := []*Server{s1, s2, s3}
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, s2, s1)
|
|
||||||
joinLAN(t, s3, s1)
|
|
||||||
|
|
||||||
for _, s := range servers {
|
|
||||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kill the leader!
|
|
||||||
var leader *Server
|
|
||||||
for _, s := range servers {
|
|
||||||
if s.IsLeader() {
|
|
||||||
leader = s
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if leader == nil {
|
|
||||||
t.Fatalf("Should have a leader")
|
|
||||||
}
|
|
||||||
if !leader.isReadyForConsistentReads() {
|
|
||||||
t.Fatalf("Expected leader to be ready for consistent reads ")
|
|
||||||
}
|
|
||||||
leader.Leave()
|
|
||||||
if leader.isReadyForConsistentReads() {
|
|
||||||
t.Fatalf("Expected consistent read state to be false ")
|
|
||||||
}
|
|
||||||
leader.Shutdown()
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
|
|
||||||
var remain *Server
|
|
||||||
for _, s := range servers {
|
|
||||||
if s == leader {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
remain = s
|
|
||||||
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 2)) })
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify the old leader is deregistered
|
|
||||||
state := remain.fsm.State()
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
_, node, err := state.GetNode(leader.config.NodeName, nil, "")
|
|
||||||
if err != nil {
|
|
||||||
r.Fatalf("err: %v", err)
|
|
||||||
}
|
|
||||||
if node != nil {
|
|
||||||
r.Fatal("leader should be deregistered")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLeader_MultiBootstrap(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
dir1, s1 := testServer(t)
|
|
||||||
defer os.RemoveAll(dir1)
|
|
||||||
defer s1.Shutdown()
|
|
||||||
|
|
||||||
dir2, s2 := testServer(t)
|
|
||||||
defer os.RemoveAll(dir2)
|
|
||||||
defer s2.Shutdown()
|
|
||||||
|
|
||||||
servers := []*Server{s1, s2}
|
|
||||||
|
|
||||||
// Try to join
|
|
||||||
joinLAN(t, s2, s1)
|
|
||||||
|
|
||||||
for _, s := range servers {
|
|
||||||
retry.Run(t, func(r *retry.R) {
|
|
||||||
if got, want := len(s.serfLAN.Members()), 2; got != want {
|
|
||||||
r.Fatalf("got %d peers want %d", got, want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure we don't have multiple raft peers
|
|
||||||
for _, s := range servers {
|
|
||||||
peers, _ := s.autopilot.NumVoters()
|
|
||||||
if peers != 1 {
|
|
||||||
t.Fatalf("should only have 1 raft peer!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLeader_TombstoneGC_Reset(t *testing.T) {
|
func TestLeader_TombstoneGC_Reset(t *testing.T) {
|
||||||
|
|
|
@ -414,6 +414,9 @@ type Server struct {
|
||||||
// Manager to handle starting/stopping go routines when establishing/revoking raft leadership
|
// Manager to handle starting/stopping go routines when establishing/revoking raft leadership
|
||||||
leaderRoutineManager *routine.Manager
|
leaderRoutineManager *routine.Manager
|
||||||
|
|
||||||
|
// registrator is an implemenation that translates serf events of Consul servers into catalog events
|
||||||
|
registrator ConsulRegistrator
|
||||||
|
|
||||||
// publisher is the EventPublisher to be shared amongst various server components. Events from
|
// publisher is the EventPublisher to be shared amongst various server components. Events from
|
||||||
// modifications to the FSM, autopilot and others will flow through here. If in the future we
|
// modifications to the FSM, autopilot and others will flow through here. If in the future we
|
||||||
// need Events generated outside of the Server and all its components, then we could move
|
// need Events generated outside of the Server and all its components, then we could move
|
||||||
|
@ -883,6 +886,24 @@ func NewServer(config *Config, flat Deps, externalGRPCServer *grpc.Server,
|
||||||
// as establishing leadership could attempt to use autopilot and cause a panic.
|
// as establishing leadership could attempt to use autopilot and cause a panic.
|
||||||
s.initAutopilot(config)
|
s.initAutopilot(config)
|
||||||
|
|
||||||
|
// Construct the registrator that makes sense for the catalog version
|
||||||
|
if s.useV2Resources {
|
||||||
|
s.registrator = V2ConsulRegistrator{
|
||||||
|
Logger: serverLogger,
|
||||||
|
NodeName: s.config.NodeName,
|
||||||
|
EntMeta: s.config.AgentEnterpriseMeta(),
|
||||||
|
Client: s.insecureResourceServiceClient,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.registrator = V1ConsulRegistrator{
|
||||||
|
Datacenter: s.config.Datacenter,
|
||||||
|
FSM: s.fsm,
|
||||||
|
Logger: serverLogger,
|
||||||
|
NodeName: s.config.NodeName,
|
||||||
|
RaftApplyFunc: s.raftApplyMsgpack,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Start monitoring leadership. This must happen after Serf is set up
|
// Start monitoring leadership. This must happen after Serf is set up
|
||||||
// since it can fire events when leadership is obtained.
|
// since it can fire events when leadership is obtained.
|
||||||
go s.monitorLeadership()
|
go s.monitorLeadership()
|
||||||
|
|
|
@ -230,6 +230,12 @@ func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func testServerWithConfig(t *testing.T, configOpts ...func(*Config)) (string, *Server) {
|
func testServerWithConfig(t *testing.T, configOpts ...func(*Config)) (string, *Server) {
|
||||||
|
return testServerWithDepsAndConfig(t, nil, configOpts...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// testServerWithDepsAndConfig is similar to testServerWithConfig except that it also allows modifying dependencies.
|
||||||
|
// This is useful for things like injecting experiment flags.
|
||||||
|
func testServerWithDepsAndConfig(t *testing.T, depOpts func(*Deps), configOpts ...func(*Config)) (string, *Server) {
|
||||||
var dir string
|
var dir string
|
||||||
var srv *Server
|
var srv *Server
|
||||||
|
|
||||||
|
@ -251,6 +257,11 @@ func testServerWithConfig(t *testing.T, configOpts ...func(*Config)) (string, *S
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
deps = newDefaultDeps(r, config)
|
deps = newDefaultDeps(r, config)
|
||||||
|
|
||||||
|
if depOpts != nil {
|
||||||
|
depOpts(&deps)
|
||||||
|
}
|
||||||
|
|
||||||
srv, err = newServerWithDeps(r, config, deps)
|
srv, err = newServerWithDeps(r, config, deps)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Fatalf("err: %v", err)
|
r.Fatalf("err: %v", err)
|
||||||
|
|
|
@ -47,6 +47,37 @@ func WaitForLeader(t *testing.T, rpc rpcFn, dc string, options ...waitOption) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WaitForRaftLeader is a V2-compatible version of WaitForLeader.
|
||||||
|
// Unlike WaitForLeader, it requires a token with operator:read access.
|
||||||
|
func WaitForRaftLeader(t *testing.T, rpc rpcFn, dc string, options ...waitOption) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
flat := flattenOptions(options)
|
||||||
|
if flat.WaitForAntiEntropySync {
|
||||||
|
t.Fatalf("WaitForRaftLeader doesn't accept the WaitForAntiEntropySync option")
|
||||||
|
}
|
||||||
|
|
||||||
|
var out structs.RaftConfigurationResponse
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
args := &structs.DCSpecificRequest{
|
||||||
|
Datacenter: dc,
|
||||||
|
QueryOptions: structs.QueryOptions{Token: flat.Token},
|
||||||
|
}
|
||||||
|
if err := rpc(context.Background(), "Operator.RaftGetConfiguration", args, &out); err != nil {
|
||||||
|
r.Fatalf("Operator.RaftGetConfiguration failed: %v", err)
|
||||||
|
}
|
||||||
|
// Don't check the Raft index. With other things are going on in V2 the assumption the index >= 2 is
|
||||||
|
// no longer valid.
|
||||||
|
|
||||||
|
for _, server := range out.Servers {
|
||||||
|
if server.Leader {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.Fatalf("No leader")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// WaitUntilNoLeader ensures no leader is present, useful for testing lost leadership.
|
// WaitUntilNoLeader ensures no leader is present, useful for testing lost leadership.
|
||||||
func WaitUntilNoLeader(t *testing.T, rpc rpcFn, dc string, options ...waitOption) {
|
func WaitUntilNoLeader(t *testing.T, rpc rpcFn, dc string, options ...waitOption) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
Loading…
Reference in New Issue