consul/test-integ/peering_commontopo/ac6_failovers_test.go

467 lines
12 KiB
Go
Raw Normal View History

[COMPLIANCE] License changes (#18443) * Adding explicit MPL license for sub-package This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository. * Adding explicit MPL license for sub-package This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository. * Updating the license from MPL to Business Source License Going forward, this project will be licensed under the Business Source License v1.1. Please see our blog post for more details at <Blog URL>, FAQ at www.hashicorp.com/licensing-faq, and details of the license at www.hashicorp.com/bsl. * add missing license headers * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 --------- Co-authored-by: hashicorp-copywrite[bot] <110428419+hashicorp-copywrite[bot]@users.noreply.github.com>
2023-08-11 09:12:13 -04:00
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package peering
import (
"fmt"
"testing"
"github.com/hashicorp/consul/testing/deployer/topology"
"github.com/stretchr/testify/require"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/test/integration/consul-container/libs/utils"
)
type ac6FailoversSuite struct {
// inputs
// with all false, this gives us a scenario with:
// - a "near" server in the accepter cluster (DC1), partitition default, namespace default
// - a "far" server in the dialer cluster (DC2), partition default, namespace default
// - a client in the accepter cluster (DC1), partition default, namespace default, with:
// - upstream near server (DC1)
// - failover to far server (DC2)
//
// TODO: technically if NearInDial && !FarInAcc (i.e., near == far), then we're not doing peering at all,
// and could do this test in a single DC
// when true, put the client (and its default upstream server) in the dialer peer; otherwise, put client in accepter
NearInDial bool
// when true, put the client (and its default upstream server) in the nondefault partition/namespace; otherwise in the default
NearInPartAlt bool
NearInNSAlt bool
// when true, put far server to the accepter peer; otherwise the dialer
FarInAcc bool
// when true, put far server to nondefault partition/namespace (ENT-only); otherwise, failover to default
FarInPartAlt bool
FarInNSAlt bool
// launch outputs, for querying during test
clientSID topology.ServiceID
// near = same DC as client; far = other DC
nearServerSID topology.ServiceID
// used to remove the node and trigger failover
nearServerNode topology.NodeID
farServerSID topology.ServiceID
farServerNode topology.NodeID
}
// Note: this test cannot share topo
func TestAC6Failovers(t *testing.T) {
// bit banging to get all permutations of all params
const nParams = 3
// i.e 2**nParams
const n = int(1) << nParams
for i := 0; i < n; i++ {
s := ac6FailoversSuite{
// xth bit == 1
NearInDial: (i>>0)&1 == 1,
NearInPartAlt: (i>>1)&1 == 1,
FarInPartAlt: (i>>2)&1 == 1,
}
// ensure the servers are always in separate DCs
s.FarInAcc = s.NearInDial
t.Run(fmt.Sprintf("%02d_%s", i, s.testName()), func(t *testing.T) {
t.Parallel()
ct := NewCommonTopo(t)
s.setup(t, ct)
ct.Launch(t)
s.test(t, ct)
})
}
}
func TestNET5029Failovers(t *testing.T) {
// TODO: *.{a,b} are not actually peering tests, and should technically be moved elsewhere
suites := map[string]ac6FailoversSuite{
"1.a": {
FarInAcc: true,
FarInPartAlt: true,
},
"1.b": {
FarInAcc: true,
FarInNSAlt: true,
},
"1.c": {
FarInNSAlt: true,
},
"1.d": {
FarInPartAlt: true,
},
"2.a": {
FarInAcc: true,
NearInPartAlt: true,
},
"2.b": {
FarInAcc: true,
NearInNSAlt: true,
},
"2.c": {
NearInDial: true,
NearInNSAlt: true,
FarInAcc: true,
},
"2.d": {
NearInDial: true,
NearInPartAlt: true,
FarInAcc: true,
},
}
for name, s := range suites {
s := s
t.Run(fmt.Sprintf("%s_%s", name, s.testName()), func(t *testing.T) {
if name == "1.b" {
t.Skip("TODO: fails with 503/504")
}
t.Parallel()
ct := NewCommonTopo(t)
s.setup(t, ct)
ct.Launch(t)
s.test(t, ct)
})
}
}
func TestAC6Failovers_AllPermutations(t *testing.T) {
//
t.Skip("Too many permutations")
// bit banging to get all permutations of all params
const nParams = 6
// i.e 2**nParams
const n = int(1) << nParams
for i := 0; i < n; i++ {
s := ac6FailoversSuite{
// xth bit == 1
NearInDial: (i>>0)&1 == 1,
FarInAcc: (i>>1)&1 == 1,
NearInPartAlt: (i>>2)&1 == 1,
FarInPartAlt: (i>>3)&1 == 1,
NearInNSAlt: (i>>4)&1 == 1,
FarInNSAlt: (i>>5)&1 == 1,
}
t.Run(fmt.Sprintf("%02d_%s", i, s.testName()), func(t *testing.T) {
t.Parallel()
ct := NewCommonTopo(t)
s.setup(t, ct)
ct.Launch(t)
s.test(t, ct)
})
}
}
func (s *ac6FailoversSuite) testName() (ret string) {
switch s.NearInDial {
case true:
ret += "dial"
default:
ret += "acc"
}
ret += "."
switch s.NearInPartAlt {
case true:
ret += "alt"
default:
ret += "default"
}
ret += "."
switch s.NearInNSAlt {
case true:
ret += "alt"
default:
ret += "default"
}
ret += "->"
switch s.FarInAcc {
case true:
ret += "acc"
default:
ret += "dial"
}
ret += "."
switch s.FarInPartAlt {
case true:
ret += "alt"
default:
ret += "default"
}
ret += "."
switch s.FarInNSAlt {
case true:
ret += "alt"
default:
ret += "default"
}
return
}
func (s *ac6FailoversSuite) setup(t *testing.T, ct *commonTopo) {
if !utils.IsEnterprise() && (s.NearInPartAlt || s.FarInPartAlt) {
t.Skip("ENT required for nondefault partitions")
}
nearClu := ct.DC1
farClu := ct.DC2
if s.NearInDial {
nearClu = ct.DC2
}
if s.FarInAcc {
farClu = ct.DC1
}
// - server in clientPartition/DC (main target)
nearServerSID := topology.ServiceID{
Name: "ac6-server",
Partition: ConfigEntryPartition("default"),
Namespace: "default",
}
if s.NearInPartAlt {
nearServerSID.Partition = "part1"
}
if s.NearInNSAlt {
nearServerSID.Namespace = "ns1"
}
nearServer := NewFortioServiceWithDefaults(
nearClu.Datacenter,
nearServerSID,
nil,
)
nearServerNode := ct.AddServiceNode(nearClu, serviceExt{Service: nearServer})
nearClu.InitialConfigEntries = append(nearClu.InitialConfigEntries,
&api.ServiceConfigEntry{
Kind: api.ServiceDefaults,
Name: nearServerSID.Name,
Partition: ConfigEntryPartition(nearServerSID.Partition),
Namespace: nearServerSID.Namespace,
Protocol: "http",
},
)
// - server in otherPartition/otherDC
farServerSID := topology.ServiceID{
Name: nearServerSID.Name,
Partition: "default",
Namespace: "default",
}
if s.FarInPartAlt {
farServerSID.Partition = "part1"
}
if s.FarInNSAlt {
farServerSID.Namespace = "ns1"
}
farServer := NewFortioServiceWithDefaults(
farClu.Datacenter,
farServerSID,
nil,
)
farServerNode := ct.AddServiceNode(farClu, serviceExt{Service: farServer})
if nearClu != farClu {
ct.ExportService(farClu, farServerSID.Partition,
api.ExportedService{
Name: farServerSID.Name,
Namespace: farServerSID.Namespace,
Consumers: []api.ServiceConsumer{
{
Peer: LocalPeerName(nearClu, nearServerSID.Partition),
},
},
},
)
} else if nearClu == farClu && farServerSID.Partition != nearServerSID.Partition {
ct.ExportService(farClu, farServerSID.Partition,
api.ExportedService{
Name: farServerSID.Name,
Namespace: farServerSID.Namespace,
Consumers: []api.ServiceConsumer{
{
// this must not be "", or else it is basically ignored altogether
// TODO: bug? if this whole struct is empty, that should be an error
Partition: topology.PartitionOrDefault(nearServerSID.Partition),
},
},
},
)
}
var targets []api.ServiceResolverFailoverTarget
if nearClu != farClu {
targets = []api.ServiceResolverFailoverTarget{
{
Service: farServerSID.Name,
Peer: LocalPeerName(farClu, farServerSID.Partition),
Namespace: farServerSID.Namespace,
},
}
} else {
part := ConfigEntryPartition(farServerSID.Partition)
// weird exception here where target partition set to "" means "inherit from parent"
// TODO: bug? docs say "" -> default:
// https://developer.hashicorp.com/consul/docs/connect/config-entries/service-resolver#failover-targets-partition
if farServerSID.Partition == "default" && nearServerSID.Partition != "default" {
part = "default"
}
targets = []api.ServiceResolverFailoverTarget{
{
Service: farServerSID.Name,
Partition: part,
Namespace: farServerSID.Namespace,
},
}
}
nearClu.InitialConfigEntries = append(nearClu.InitialConfigEntries,
&api.ServiceConfigEntry{
Kind: api.ServiceDefaults,
Name: farServerSID.Name,
Partition: ConfigEntryPartition(farServerSID.Partition),
Namespace: farServerSID.Namespace,
Protocol: "http",
},
&api.ServiceResolverConfigEntry{
Kind: api.ServiceResolver,
Name: nearServerSID.Name,
Partition: ConfigEntryPartition(nearServerSID.Partition),
Namespace: nearServerSID.Namespace,
Failover: map[string]api.ServiceResolverFailover{
"*": {
Targets: targets,
},
},
},
)
clientSID := topology.ServiceID{
Name: "ac6-client",
Partition: nearServerSID.Partition,
Namespace: nearServerSID.Namespace,
}
client := NewFortioServiceWithDefaults(
nearClu.Datacenter,
clientSID,
func(s *topology.Service) {
// Upstream per partition
s.Upstreams = []*topology.Upstream{
{
ID: topology.ServiceID{
Name: nearServerSID.Name,
Partition: nearServerSID.Partition,
Namespace: nearServerSID.Namespace,
},
LocalPort: 5000,
// exposed so we can hit it directly
// TODO: we shouldn't do this; it's not realistic
LocalAddress: "0.0.0.0",
},
}
},
)
ct.AddServiceNode(nearClu, serviceExt{Service: client})
nearClu.InitialConfigEntries = append(nearClu.InitialConfigEntries,
&api.ServiceConfigEntry{
Kind: api.ServiceDefaults,
Name: clientSID.Name,
Partition: ConfigEntryPartition(clientSID.Partition),
Namespace: clientSID.Namespace,
Protocol: "http",
},
)
// intentions
nearClu.InitialConfigEntries = append(nearClu.InitialConfigEntries,
&api.ServiceIntentionsConfigEntry{
Kind: api.ServiceIntentions,
Name: nearServerSID.Name,
Partition: ConfigEntryPartition(nearServerSID.Partition),
Namespace: nearServerSID.Namespace,
Sources: []*api.SourceIntention{{
Name: clientSID.Name,
Namespace: clientSID.Namespace,
// in this field, "" -> destination partition, so no ConfigEntryPartition :eyeroll:
// https://developer.hashicorp.com/consul/docs/connect/config-entries/service-intentions#sources-partition
Partition: topology.PartitionOrDefault(clientSID.Partition),
Action: api.IntentionActionAllow,
}},
},
)
farSource := api.SourceIntention{
Name: clientSID.Name,
Namespace: clientSID.Namespace,
Peer: LocalPeerName(nearClu, clientSID.Partition),
Action: api.IntentionActionAllow,
}
if nearClu == farClu {
farSource.Peer = ""
// in this field, "" -> destination partition, so no ConfigEntryPartition :eyeroll:
// https://developer.hashicorp.com/consul/docs/connect/config-entries/service-intentions#sources-partition
farSource.Partition = topology.PartitionOrDefault(clientSID.Partition)
}
farClu.InitialConfigEntries = append(farClu.InitialConfigEntries,
&api.ServiceIntentionsConfigEntry{
Kind: api.ServiceIntentions,
Name: farServerSID.Name,
Partition: ConfigEntryPartition(farServerSID.Partition),
Namespace: farServerSID.Namespace,
Sources: []*api.SourceIntention{&farSource},
},
)
s.clientSID = clientSID
s.nearServerSID = nearServerSID
s.farServerSID = farServerSID
s.nearServerNode = nearServerNode.ID()
s.farServerNode = farServerNode.ID()
}
func (s *ac6FailoversSuite) test(t *testing.T, ct *commonTopo) {
// NOTE: *not parallel* because we mutate resources that are shared
// between test cases (disable/enable nodes)
nearClu := ct.Sprawl.Topology().Clusters["dc1"]
farClu := ct.Sprawl.Topology().Clusters["dc2"]
if s.NearInDial {
nearClu = ct.Sprawl.Topology().Clusters["dc2"]
}
if s.FarInAcc {
farClu = ct.Sprawl.Topology().Clusters["dc1"]
}
svcs := nearClu.ServicesByID(s.clientSID)
require.Len(t, svcs, 1, "expected exactly one client in datacenter")
client := svcs[0]
require.Len(t, client.Upstreams, 1, "expected one upstream for client")
upstream := client.Upstreams[0]
fmt.Println("### preconditions")
// this is the server in the same DC and partitions as client
serverSID := s.nearServerSID
serverSID.Normalize()
ct.Assert.FortioFetch2FortioName(t, client, upstream, nearClu.Name, serverSID)
ct.Assert.CatalogServiceExists(t, nearClu.Name, upstream.ID.Name, utils.CompatQueryOpts(&api.QueryOptions{
Partition: upstream.ID.Partition,
Namespace: upstream.ID.Namespace,
}))
if t.Failed() {
t.Fatal("failed preconditions")
}
fmt.Println("### failover")
cfg := ct.Sprawl.Config()
DisableNode(t, cfg, nearClu.Name, s.nearServerNode)
require.NoError(t, ct.Sprawl.RelaunchWithPhase(cfg, "failover"))
// Clusters for imported services rely on outlier detection for
// failovers, NOT eds_health_status. This means that killing the
// node above does not actually make the envoy cluster UNHEALTHY
// so we do not assert for it.
expectSID := s.farServerSID
expectSID.Normalize()
ct.Assert.FortioFetch2FortioName(t, client, upstream, farClu.Name, expectSID)
}