mirror of
https://github.com/status-im/consul.git
synced 2025-01-24 20:51:10 +00:00
c029b20615
The new controller caches are initialized before the DependencyMappers or the Reconciler run, but importantly they are not populated. The expectation is that when the WatchList call is made to the resource service it will send an initial snapshot of all resources matching a single type, and then perpetually send UPSERT/DELETE events afterward. This initial snapshot will cycle through the caching layer and will catch it up to reflect the stored data. Critically the dependency mappers and reconcilers will race against the restoration of the caches on server startup or leader election. During this time it is possible a mapper or reconciler will use the cache to lookup a specific relationship and not find it. That very same reconciler may choose to then recompute some persisted resource and in effect rewind it to a prior computed state. Change - Since we are updating the behavior of the WatchList RPC, it was aligned to match that of pbsubscribe and pbpeerstream using a protobuf oneof instead of the enum+fields option. - The WatchList rpc now has 3 alternating response events: Upsert, Delete, EndOfSnapshot. When set the initial batch of "snapshot" Upserts sent on a new watch, those operations will be followed by an EndOfSnapshot event before beginning the never-ending sequence of Upsert/Delete events. - Within the Controller startup code we will launch N+1 goroutines to execute WatchList queries for the watched types. The UPSERTs will be applied to the nascent cache only (no mappers will execute). - Upon witnessing the END operation, those goroutines will terminate. - When all cache priming routines complete, then the normal set of N+1 long lived watch routines will launch to officially witness all events in the system using the primed cached.
178 lines
3.7 KiB
Go
178 lines
3.7 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package raft_test
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"math/rand"
|
|
"net"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
"github.com/hashicorp/consul/internal/storage"
|
|
"github.com/hashicorp/consul/internal/storage/conformance"
|
|
"github.com/hashicorp/consul/internal/storage/raft"
|
|
"github.com/hashicorp/consul/sdk/testutil"
|
|
)
|
|
|
|
func TestBackend_Conformance(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("too slow for testing.Short")
|
|
}
|
|
t.Run("Leader", func(t *testing.T) {
|
|
conformance.Test(t, conformance.TestOptions{
|
|
NewBackend: func(t *testing.T) storage.Backend {
|
|
leader, _ := newRaftCluster(t)
|
|
return leader
|
|
},
|
|
SupportsStronglyConsistentList: true,
|
|
})
|
|
})
|
|
|
|
t.Run("Follower", func(t *testing.T) {
|
|
conformance.Test(t, conformance.TestOptions{
|
|
NewBackend: func(t *testing.T) storage.Backend {
|
|
_, follower := newRaftCluster(t)
|
|
return follower
|
|
},
|
|
SupportsStronglyConsistentList: true,
|
|
IgnoreWatchListSnapshotOperations: true,
|
|
})
|
|
})
|
|
}
|
|
|
|
func newRaftCluster(t *testing.T) (*raft.Backend, *raft.Backend) {
|
|
t.Helper()
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
t.Cleanup(cancel)
|
|
|
|
lis, err := net.Listen("tcp", ":0")
|
|
require.NoError(t, err)
|
|
|
|
lc, err := grpc.DialContext(ctx, lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials()))
|
|
require.NoError(t, err)
|
|
|
|
lh := &leaderHandle{replCh: make(chan log, 10)}
|
|
leader, err := raft.NewBackend(lh, testutil.Logger(t))
|
|
require.NoError(t, err)
|
|
lh.backend = leader
|
|
go leader.Run(ctx)
|
|
|
|
go func() {
|
|
for {
|
|
conn, err := lis.Accept()
|
|
if errors.Is(err, net.ErrClosed) {
|
|
return
|
|
}
|
|
require.NoError(t, err)
|
|
go leader.HandleConnection(conn)
|
|
}
|
|
}()
|
|
|
|
follower, err := raft.NewBackend(&followerHandle{leaderConn: lc}, testutil.Logger(t))
|
|
require.NoError(t, err)
|
|
go follower.Run(ctx)
|
|
follower.LeaderChanged()
|
|
|
|
go lh.replicate(t, follower)
|
|
|
|
return leader, follower
|
|
}
|
|
|
|
type followerHandle struct {
|
|
leaderConn *grpc.ClientConn
|
|
}
|
|
|
|
func (followerHandle) Apply([]byte) (any, error) {
|
|
return nil, errors.New("not leader")
|
|
}
|
|
|
|
func (followerHandle) IsLeader() bool {
|
|
return false
|
|
}
|
|
|
|
func (followerHandle) EnsureStrongConsistency(context.Context) error {
|
|
return errors.New("not leader")
|
|
}
|
|
|
|
func (f *followerHandle) DialLeader() (*grpc.ClientConn, error) {
|
|
return f.leaderConn, nil
|
|
}
|
|
|
|
type leaderHandle struct {
|
|
index uint64
|
|
replCh chan log
|
|
|
|
backend *raft.Backend
|
|
}
|
|
|
|
type log struct {
|
|
idx uint64
|
|
msg []byte
|
|
}
|
|
|
|
func (l *leaderHandle) Apply(msg []byte) (any, error) {
|
|
idx := atomic.AddUint64(&l.index, 1)
|
|
|
|
// Apply the operation to the leader synchronously and capture its response
|
|
// to return to the caller.
|
|
rsp := l.backend.Apply(msg, idx)
|
|
|
|
// Replicate the operation to the follower asynchronously.
|
|
l.replCh <- log{idx, msg}
|
|
|
|
if err, ok := rsp.(error); ok {
|
|
return nil, err
|
|
}
|
|
return rsp, nil
|
|
}
|
|
|
|
func (leaderHandle) IsLeader() bool {
|
|
return true
|
|
}
|
|
|
|
func (leaderHandle) EnsureStrongConsistency(context.Context) error {
|
|
return nil
|
|
}
|
|
|
|
func (leaderHandle) DialLeader() (*grpc.ClientConn, error) {
|
|
return nil, errors.New("leader should not dial itself")
|
|
}
|
|
|
|
func (h *leaderHandle) replicate(t *testing.T, follower *raft.Backend) {
|
|
doneCh := make(chan struct{})
|
|
t.Cleanup(func() { close(doneCh) })
|
|
|
|
timer := time.NewTimer(replicationLag())
|
|
defer timer.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-timer.C:
|
|
select {
|
|
case l := <-h.replCh:
|
|
_ = follower.Apply(l.msg, l.idx)
|
|
default:
|
|
}
|
|
timer.Reset(replicationLag())
|
|
case <-doneCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func replicationLag() time.Duration {
|
|
if testing.Short() {
|
|
return 0
|
|
}
|
|
return time.Duration(rand.Intn(50)) * time.Millisecond
|
|
}
|