mirror of
https://github.com/status-im/consul.git
synced 2025-01-10 05:45:46 +00:00
1b4218a068
The test had two racy bugs related to memdb references. The first was when we initially populated data and retained the FederationState objects in a slice. Due to how the `inmemCodec` works these were actually the identical objects passed into memdb. The second was that the `checkSame` assertion function was reading from memdb and setting the RaftIndexes to zeros to aid in equality checks. This was mutating the contents of memdb which is a no-no. With this fix, the command: ``` i=0; while /usr/local/bin/go test -count=1 -timeout 30s github.com/hashicorp/consul/agent/consul -run '^(TestReplication_FederationStates)$'; do i=$((i + 1)); printf "$i "; done ``` That used to break on my machine in less than 20 runs is now running 150+ times without any issue. Might also fix #7575
151 lines
4.2 KiB
Go
151 lines
4.2 KiB
Go
package consul
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
"github.com/hashicorp/consul/api"
|
|
"github.com/hashicorp/consul/sdk/testutil/retry"
|
|
"github.com/hashicorp/consul/testrpc"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestReplication_FederationStates(t *testing.T) {
|
|
t.Parallel()
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.PrimaryDatacenter = "dc1"
|
|
c.DisableFederationStateAntiEntropy = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
client := rpcClient(t, s1)
|
|
defer client.Close()
|
|
|
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
c.Datacenter = "dc2"
|
|
c.PrimaryDatacenter = "dc1"
|
|
c.FederationStateReplicationRate = 100
|
|
c.FederationStateReplicationBurst = 100
|
|
c.FederationStateReplicationApplyLimit = 1000000
|
|
c.DisableFederationStateAntiEntropy = true
|
|
})
|
|
testrpc.WaitForLeader(t, s2.RPC, "dc2")
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
// Try to join.
|
|
joinWAN(t, s2, s1)
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc2")
|
|
|
|
// Create some new federation states (weird because we're having dc1 update it for the other 50)
|
|
var fedStateDCs []string
|
|
for i := 0; i < 50; i++ {
|
|
dc := fmt.Sprintf("alt-dc%d", i+1)
|
|
ip1 := fmt.Sprintf("1.2.3.%d", i+1)
|
|
ip2 := fmt.Sprintf("4.3.2.%d", i+1)
|
|
arg := structs.FederationStateRequest{
|
|
Datacenter: "dc1",
|
|
Op: structs.FederationStateUpsert,
|
|
State: &structs.FederationState{
|
|
Datacenter: dc,
|
|
MeshGateways: []structs.CheckServiceNode{
|
|
newTestMeshGatewayNode(
|
|
dc, "gateway1", ip1, 443, map[string]string{structs.MetaWANFederationKey: "1"}, api.HealthPassing,
|
|
),
|
|
newTestMeshGatewayNode(
|
|
dc, "gateway2", ip2, 443, map[string]string{structs.MetaWANFederationKey: "1"}, api.HealthPassing,
|
|
),
|
|
},
|
|
UpdatedAt: time.Now().UTC(),
|
|
},
|
|
}
|
|
|
|
out := false
|
|
require.NoError(t, s1.RPC("FederationState.Apply", &arg, &out))
|
|
fedStateDCs = append(fedStateDCs, dc)
|
|
}
|
|
|
|
checkSame := func(t *retry.R) error {
|
|
_, remote, err := s1.fsm.State().FederationStateList(nil)
|
|
require.NoError(t, err)
|
|
_, local, err := s2.fsm.State().FederationStateList(nil)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, local, len(remote))
|
|
for i := range remote {
|
|
// Make lightweight copies so we can zero out the raft fields
|
|
// without mutating the copies in memdb.
|
|
remoteCopy := *remote[i]
|
|
localCopy := *local[i]
|
|
// zero out the raft data for future comparisons
|
|
remoteCopy.RaftIndex = structs.RaftIndex{}
|
|
localCopy.RaftIndex = structs.RaftIndex{}
|
|
require.Equal(t, remoteCopy, localCopy)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Wait for the replica to converge.
|
|
retry.Run(t, func(r *retry.R) {
|
|
checkSame(r)
|
|
})
|
|
|
|
// Update those states
|
|
for i := 0; i < 50; i++ {
|
|
dc := fmt.Sprintf("alt-dc%d", i+1)
|
|
ip1 := fmt.Sprintf("1.2.3.%d", i+1)
|
|
ip2 := fmt.Sprintf("4.3.2.%d", i+1)
|
|
ip3 := fmt.Sprintf("5.8.9.%d", i+1)
|
|
arg := structs.FederationStateRequest{
|
|
Datacenter: "dc1",
|
|
Op: structs.FederationStateUpsert,
|
|
State: &structs.FederationState{
|
|
Datacenter: dc,
|
|
MeshGateways: []structs.CheckServiceNode{
|
|
newTestMeshGatewayNode(
|
|
dc, "gateway1", ip1, 8443, map[string]string{structs.MetaWANFederationKey: "1"}, api.HealthPassing,
|
|
),
|
|
newTestMeshGatewayNode(
|
|
dc, "gateway2", ip2, 8443, map[string]string{structs.MetaWANFederationKey: "1"}, api.HealthPassing,
|
|
),
|
|
newTestMeshGatewayNode(
|
|
dc, "gateway3", ip3, 8443, map[string]string{structs.MetaWANFederationKey: "1"}, api.HealthPassing,
|
|
),
|
|
},
|
|
UpdatedAt: time.Now().UTC(),
|
|
},
|
|
}
|
|
|
|
out := false
|
|
require.NoError(t, s1.RPC("FederationState.Apply", &arg, &out))
|
|
}
|
|
|
|
// Wait for the replica to converge.
|
|
retry.Run(t, func(r *retry.R) {
|
|
checkSame(r)
|
|
})
|
|
|
|
for _, fedStateDC := range fedStateDCs {
|
|
arg := structs.FederationStateRequest{
|
|
Datacenter: "dc1",
|
|
Op: structs.FederationStateDelete,
|
|
State: &structs.FederationState{
|
|
Datacenter: fedStateDC,
|
|
},
|
|
}
|
|
|
|
out := false
|
|
require.NoError(t, s1.RPC("FederationState.Apply", &arg, &out))
|
|
}
|
|
|
|
// Wait for the replica to converge.
|
|
retry.Run(t, func(r *retry.R) {
|
|
checkSame(r)
|
|
})
|
|
}
|