mirror of
https://github.com/status-im/consul.git
synced 2025-01-22 03:29:43 +00:00
Does a clean up pass on the Consul side.
This commit is contained in:
parent
01d2452ea3
commit
86b112fe31
@ -192,15 +192,17 @@ func Create(config *Config, logOutput io.Writer) (*Agent, error) {
|
||||
// Start handling events
|
||||
go agent.handleEvents()
|
||||
|
||||
// Start sending network coordinate to the server.
|
||||
if config.EnableCoordinates {
|
||||
go agent.sendCoordinate()
|
||||
}
|
||||
|
||||
// Write out the PID file if necessary
|
||||
err = agent.storePid()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start sending network coordinates to servers
|
||||
go agent.sendCoordinates()
|
||||
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
@ -560,12 +562,13 @@ func (a *Agent) ResumeSync() {
|
||||
a.state.Resume()
|
||||
}
|
||||
|
||||
// sendCoordinates starts a loop that periodically sends the local coordinate
|
||||
// to a server
|
||||
func (a *Agent) sendCoordinates() {
|
||||
// sendCoordinate is a long-running loop that periodically sends our coordinate
|
||||
// to the server. Closing the agent's shutdownChannel will cause this to exit.
|
||||
func (a *Agent) sendCoordinate() {
|
||||
for {
|
||||
intv := aeScale(a.config.SyncCoordinateInterval, len(a.LANMembers()))
|
||||
intv = intv + randomStagger(intv)
|
||||
|
||||
select {
|
||||
case <-time.After(intv):
|
||||
var c *coordinate.Coordinate
|
||||
@ -577,14 +580,14 @@ func (a *Agent) sendCoordinates() {
|
||||
req := structs.CoordinateUpdateRequest{
|
||||
Datacenter: a.config.Datacenter,
|
||||
Node: a.config.NodeName,
|
||||
Op: structs.CoordinateSet,
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: c,
|
||||
WriteRequest: structs.WriteRequest{Token: a.config.ACLToken},
|
||||
}
|
||||
|
||||
var reply struct{}
|
||||
if err := a.RPC("Coordinate.Update", &req, &reply); err != nil {
|
||||
a.logger.Printf("[ERR] agent: coordinate update error: %s", err.Error())
|
||||
a.logger.Printf("[ERR] agent: coordinate update error: %s", err)
|
||||
}
|
||||
case <-a.shutdownCh:
|
||||
return
|
||||
|
@ -368,13 +368,17 @@ type Config struct {
|
||||
AtlasEndpoint string `mapstructure:"atlas_endpoint"`
|
||||
|
||||
// AEInterval controls the anti-entropy interval. This is how often
|
||||
// the agent attempts to reconcile it's local state with the server'
|
||||
// the agent attempts to reconcile its local state with the server's
|
||||
// representation of our state. Defaults to every 60s.
|
||||
AEInterval time.Duration `mapstructure:"-" json:"-"`
|
||||
|
||||
// SyncCoordinateInterval controls the interval for sending network coordinates
|
||||
// to servers. Defaults to every 15s, but scales up as the number of nodes increases
|
||||
// in the network, to prevent servers from being overwhelmed.
|
||||
// EnableCoordinates enables features related to network coordinates.
|
||||
EnableCoordinates bool `mapstructure:"enable_coordinates" json:"-"`
|
||||
|
||||
// SyncCoordinateInterval controls the interval for sending network
|
||||
// coordinates to the server. Defaults to every 15s, but scales up as
|
||||
// the number of nodes increases in the network, to prevent servers from
|
||||
// being overwhelmed.
|
||||
SyncCoordinateInterval time.Duration `mapstructure:"-" json:"-"`
|
||||
|
||||
// Checks holds the provided check definitions
|
||||
@ -471,6 +475,7 @@ func DefaultConfig() *Config {
|
||||
Protocol: consul.ProtocolVersionMax,
|
||||
CheckUpdateInterval: 5 * time.Minute,
|
||||
AEInterval: time.Minute,
|
||||
EnableCoordinates: true,
|
||||
SyncCoordinateInterval: 15 * time.Second,
|
||||
ACLTTL: 30 * time.Second,
|
||||
ACLDownPolicy: "extend-cache",
|
||||
|
@ -387,6 +387,12 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var testRegisterRules = `
|
||||
service "api" {
|
||||
policy = "write"
|
||||
}
|
||||
`
|
||||
|
||||
func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
|
||||
conf := nextConfig()
|
||||
conf.ACLDatacenter = "dc1"
|
||||
@ -796,55 +802,31 @@ func TestAgent_nestedPauseResume(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
var testRegisterRules = `
|
||||
service "api" {
|
||||
policy = "write"
|
||||
}
|
||||
`
|
||||
|
||||
func TestAgentSendCoordinates(t *testing.T) {
|
||||
conf1 := nextConfig()
|
||||
conf1.SyncCoordinateInterval = 10 * time.Millisecond
|
||||
dir1, agent1 := makeAgent(t, conf1)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer agent1.Shutdown()
|
||||
|
||||
conf2 := nextConfig()
|
||||
conf2.SyncCoordinateInterval = 10 * time.Millisecond
|
||||
dir2, agent2 := makeAgent(t, conf2)
|
||||
defer os.RemoveAll(dir2)
|
||||
defer agent2.Shutdown()
|
||||
func TestAgent_sendCoordinate(t *testing.T) {
|
||||
conf := nextConfig()
|
||||
conf.SyncCoordinateInterval = 10 * time.Millisecond
|
||||
conf.ConsulConfig.CoordinateUpdatePeriod = 0 * time.Millisecond
|
||||
dir, agent := makeAgent(t, conf)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
agent2Addr := fmt.Sprintf("127.0.0.1:%d", agent2.config.Ports.SerfLan)
|
||||
if _, err := agent2.JoinLAN([]string{agent2Addr}); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
testutil.WaitForLeader(t, agent.RPC, "dc1")
|
||||
|
||||
// Wait a little while for an update.
|
||||
time.Sleep(3 * conf.SyncCoordinateInterval)
|
||||
|
||||
// Make sure the coordinate is present.
|
||||
req := structs.NodeSpecificRequest {
|
||||
Datacenter: agent.config.Datacenter,
|
||||
Node: agent.config.NodeName,
|
||||
}
|
||||
|
||||
testutil.WaitForLeader(t, agent1.RPC, "dc1")
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
var reply structs.IndexedCoordinate
|
||||
req := structs.CoordinateGetRequest{
|
||||
Datacenter: agent1.config.Datacenter,
|
||||
Node: agent1.config.NodeName,
|
||||
}
|
||||
if err := agent1.RPC("Coordinate.Get", &req, &reply); err != nil {
|
||||
if err := agent.RPC("Coordinate.Get", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if reply.Coord == nil {
|
||||
t.Fatalf("should get a coordinate")
|
||||
}
|
||||
|
||||
var reply2 structs.IndexedCoordinate
|
||||
req2 := structs.CoordinateGetRequest{
|
||||
Datacenter: agent2.config.Datacenter,
|
||||
Node: agent2.config.NodeName,
|
||||
}
|
||||
if err := agent1.RPC("Coordinate.Get", &req2, &reply2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if reply2.Coord == nil {
|
||||
t.Fatalf("should get a coordinate")
|
||||
}
|
||||
}
|
||||
|
@ -380,7 +380,8 @@ func (c *Client) Stats() map[string]map[string]string {
|
||||
return stats
|
||||
}
|
||||
|
||||
// GetCoordinate returns the network coordinate of the receiver
|
||||
// GetCoordinate returns the network coordinate of the current node, as
|
||||
// maintained by Serf.
|
||||
func (c *Client) GetCoordinate() *coordinate.Coordinate {
|
||||
return c.serf.GetCoordinate()
|
||||
}
|
||||
|
@ -206,13 +206,14 @@ type Config struct {
|
||||
// EnableCoordinates enables features related to network coordinates.
|
||||
EnableCoordinates bool
|
||||
|
||||
// CoordinateUpdatePeriod controls how long a server batches coordinate updates
|
||||
// before applying them in a Raft transaction. A larger period leads to fewer
|
||||
// Raft transactions, but also the stored coordinates being more stale.
|
||||
// CoordinateUpdatePeriod controls how long a server batches coordinate
|
||||
// updates before applying them in a Raft transaction. A larger period
|
||||
// leads to fewer Raft transactions, but also the stored coordinates
|
||||
// being more stale.
|
||||
CoordinateUpdatePeriod time.Duration
|
||||
|
||||
// CoordinateUpdateMaxBatchSize controls the maximum number of updates a
|
||||
// server batches before applying them in a Raft transaction
|
||||
// server batches before applying them in a Raft transaction.
|
||||
CoordinateUpdateMaxBatchSize int
|
||||
}
|
||||
|
||||
@ -269,7 +270,7 @@ func DefaultConfig() *Config {
|
||||
TombstoneTTLGranularity: 30 * time.Second,
|
||||
SessionTTLMin: 10 * time.Second,
|
||||
EnableCoordinates: true,
|
||||
CoordinateUpdatePeriod: time.Duration(30) * time.Second,
|
||||
CoordinateUpdatePeriod: 30 * time.Second,
|
||||
CoordinateUpdateMaxBatchSize: 1000,
|
||||
}
|
||||
|
||||
|
@ -7,15 +7,68 @@ import (
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
)
|
||||
|
||||
// Coordinate manages queries and updates for network coordinates.
|
||||
type Coordinate struct {
|
||||
srv *Server
|
||||
updateLastSent time.Time
|
||||
updateBuffer []*structs.CoordinateUpdateRequest
|
||||
// srv is a pointer back to the server.
|
||||
srv *Server
|
||||
|
||||
// updateLastSent is the last time we flushed pending coordinate updates
|
||||
// to the Raft log. CoordinateUpdatePeriod is used to control how long we
|
||||
// wait before doing an update (that time, or hitting more than the
|
||||
// configured CoordinateUpdateMaxBatchSize, whichever comes first).
|
||||
updateLastSent time.Time
|
||||
|
||||
// updateBuffer holds the pending coordinate updates, waiting to be
|
||||
// flushed to the Raft log.
|
||||
updateBuffer []*structs.CoordinateUpdateRequest
|
||||
|
||||
// updateBufferLock manages concurrent access to updateBuffer.
|
||||
updateBufferLock sync.Mutex
|
||||
}
|
||||
|
||||
// GetLAN returns the the LAN coordinate of a node.
|
||||
func (c *Coordinate) GetLAN(args *structs.NodeSpecificRequest, reply *structs.IndexedCoordinate) error {
|
||||
// NewCoordinate returns a new Coordinate endpoint.
|
||||
func NewCoordinate(srv *Server) *Coordinate {
|
||||
return &Coordinate{
|
||||
srv: srv,
|
||||
updateLastSent: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// Update handles requests to update the LAN coordinate of a node.
|
||||
func (c *Coordinate) Update(args *structs.CoordinateUpdateRequest, reply *struct{}) error {
|
||||
if done, err := c.srv.forward("Coordinate.Update", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
c.updateBufferLock.Lock()
|
||||
defer c.updateBufferLock.Unlock()
|
||||
c.updateBuffer = append(c.updateBuffer, args)
|
||||
|
||||
// Process updates in batches to avoid tons of small transactions against
|
||||
// the Raft log.
|
||||
shouldFlush := time.Since(c.updateLastSent) > c.srv.config.CoordinateUpdatePeriod ||
|
||||
len(c.updateBuffer) > c.srv.config.CoordinateUpdateMaxBatchSize
|
||||
if shouldFlush {
|
||||
// This transaction could take a while so we don't block here.
|
||||
buf := c.updateBuffer
|
||||
go func() {
|
||||
_, err := c.srv.raftApply(structs.CoordinateRequestType, buf)
|
||||
if err != nil {
|
||||
c.srv.logger.Printf("[ERR] consul.coordinate: Update failed: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// We clear the buffer regardless of whether the raft transaction
|
||||
// succeeded, just so the buffer doesn't keep growing without bound.
|
||||
c.updateLastSent = time.Now()
|
||||
c.updateBuffer = nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns the coordinate of the given node in the LAN.
|
||||
func (c *Coordinate) Get(args *structs.NodeSpecificRequest, reply *structs.IndexedCoordinate) error {
|
||||
if done, err := c.srv.forward("Coordinate.GetLAN", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
@ -35,61 +88,3 @@ func (c *Coordinate) GetLAN(args *structs.NodeSpecificRequest, reply *structs.In
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
// GetWAN returns the WAN coordinates of the servers in a given datacenter.
|
||||
//
|
||||
// Note that the server does not necessarily know about *all* servers in the given datacenter.
|
||||
// It just returns the coordinates of those that it knows.
|
||||
func (c *Coordinate) GetWAN(args *structs.DCSpecificRequest, reply *structs.CoordinateList) error {
|
||||
if args.Datacenter == c.srv.config.Datacenter {
|
||||
reply.Coords = make([]structs.Coordinate, 1)
|
||||
reply.Coords[0] = structs.Coordinate{
|
||||
Node: c.srv.config.NodeName,
|
||||
Coord: c.srv.GetWANCoordinate(),
|
||||
}
|
||||
} else {
|
||||
servers := c.srv.remoteConsuls[args.Datacenter] // servers in the specified DC
|
||||
reply.Coords = make([]structs.Coordinate, 0)
|
||||
for i := 0; i < len(servers); i++ {
|
||||
if coord := c.srv.serfWAN.GetCachedCoordinate(servers[i].Name); coord != nil {
|
||||
reply.Coords = append(reply.Coords, structs.Coordinate{
|
||||
Node: servers[i].Name,
|
||||
Coord: coord,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func flushCoordinates(c *Coordinate, buf []*structs.CoordinateUpdateRequest) {
|
||||
_, err := c.srv.raftApply(structs.CoordinateRequestType, buf)
|
||||
if err != nil {
|
||||
c.srv.logger.Printf("[ERR] consul.coordinate: Update failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update updates the the LAN coordinate of a node.
|
||||
func (c *Coordinate) Update(args *structs.CoordinateUpdateRequest, reply *struct{}) error {
|
||||
if done, err := c.srv.forward("Coordinate.Update", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
c.updateBufferLock.Lock()
|
||||
defer c.updateBufferLock.Unlock()
|
||||
c.updateBuffer = append(c.updateBuffer, args)
|
||||
|
||||
if time.Since(c.updateLastSent) > c.srv.config.CoordinateUpdatePeriod || len(c.updateBuffer) > c.srv.config.CoordinateUpdateMaxBatchSize {
|
||||
c.srv.logger.Printf("sending update for %v", args.Node)
|
||||
// Apply the potentially time-consuming transaction out of band
|
||||
go flushCoordinates(c, c.updateBuffer)
|
||||
|
||||
// We clear the buffer regardless of whether the raft transaction succeeded, just so the
|
||||
// buffer doesn't keep growing without bound.
|
||||
c.updateLastSent = time.Now()
|
||||
c.updateBuffer = nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -13,67 +13,68 @@ import (
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// getRandomCoordinate generates a random coordinate.
|
||||
func getRandomCoordinate() *coordinate.Coordinate {
|
||||
// generateRandomCoordinate creates a random coordinate. This mucks with the
|
||||
// underlying structure directly, so it's not really useful for any particular
|
||||
// position in the network, but it's a good payload to send through to make
|
||||
// sure things come out the other side or get stored correctly.
|
||||
func generateRandomCoordinate() *coordinate.Coordinate {
|
||||
config := coordinate.DefaultConfig()
|
||||
// Randomly apply updates between n clients
|
||||
n := 5
|
||||
clients := make([]*coordinate.Client, n)
|
||||
for i := 0; i < n; i++ {
|
||||
clients[i], _ = coordinate.NewClient(config)
|
||||
coord := coordinate.NewCoordinate(config)
|
||||
for i := range coord.Vec {
|
||||
coord.Vec[i] = rand.NormFloat64()
|
||||
}
|
||||
|
||||
for i := 0; i < n*100; i++ {
|
||||
k1 := rand.Intn(n)
|
||||
k2 := rand.Intn(n)
|
||||
if k1 == k2 {
|
||||
continue
|
||||
}
|
||||
clients[k1].Update(clients[k2].GetCoordinate(), time.Duration(rand.Int63())*time.Microsecond)
|
||||
}
|
||||
return clients[rand.Intn(n)].GetCoordinate()
|
||||
coord.Error = rand.NormFloat64()
|
||||
coord.Adjustment = rand.NormFloat64()
|
||||
return coord
|
||||
}
|
||||
|
||||
func coordinatesEqual(a, b *coordinate.Coordinate) bool {
|
||||
return reflect.DeepEqual(a, b)
|
||||
// verifyCoordinatesEqual will compare a and b and fail if they are not exactly
|
||||
// equal (no floating point fuzz is considered).
|
||||
func verifyCoordinatesEqual(t *testing.T, a, b *coordinate.Coordinate) {
|
||||
if !reflect.DeepEqual(a, b) {
|
||||
t.Fatalf("coordinates are not equal: %v != %v", a, b)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoordinate_Update(t *testing.T) {
|
||||
name := fmt.Sprintf("Node %d", getPort())
|
||||
dir1, config1 := testServerConfig(t, name)
|
||||
config1.CoordinateUpdatePeriod = 1000 * time.Millisecond
|
||||
defer os.RemoveAll(dir1)
|
||||
|
||||
config1.CoordinateUpdatePeriod = 1 * time.Second
|
||||
config1.CoordinateUpdateMaxBatchSize = 5
|
||||
s1, err := NewServer(config1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
client := rpcClient(t, s1)
|
||||
defer client.Close()
|
||||
|
||||
testutil.WaitForLeader(t, client.Call, "dc1")
|
||||
|
||||
arg1 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node1",
|
||||
Op: structs.CoordinateSet,
|
||||
Coord: getRandomCoordinate(),
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
|
||||
arg2 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node2",
|
||||
Op: structs.CoordinateSet,
|
||||
Coord: getRandomCoordinate(),
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
|
||||
// Send an update for the first node.
|
||||
var out struct{}
|
||||
if err := client.Call("Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Verify
|
||||
// Make sure the update did not yet apply because the batching thresholds
|
||||
// haven't yet been met.
|
||||
state := s1.fsm.State()
|
||||
_, d, err := state.CoordinateGet("node1")
|
||||
if err != nil {
|
||||
@ -83,12 +84,15 @@ func TestCoordinate_Update(t *testing.T) {
|
||||
t.Fatalf("should be nil because the update should be batched")
|
||||
}
|
||||
|
||||
// Wait a while and send another update; this time the updates should be sent
|
||||
// Wait a while and send another update. This time both updates should
|
||||
// be applied.
|
||||
time.Sleep(2 * s1.config.CoordinateUpdatePeriod)
|
||||
if err := client.Call("Coordinate.Update", &arg2, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
// Yield the current goroutine to allow the goroutine that sends the updates to run
|
||||
|
||||
// Wait a little while so the flush goroutine can run, then make sure
|
||||
// both coordinates made it in.
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
_, d, err = state.CoordinateGet("node1")
|
||||
@ -98,9 +102,7 @@ func TestCoordinate_Update(t *testing.T) {
|
||||
if d == nil {
|
||||
t.Fatalf("should return a coordinate but it's nil")
|
||||
}
|
||||
if !coordinatesEqual(d.Coord, arg1.Coord) {
|
||||
t.Fatalf("should be equal\n%v\n%v", d.Coord, arg1.Coord)
|
||||
}
|
||||
verifyCoordinatesEqual(t, d.Coord, arg1.Coord)
|
||||
|
||||
_, d, err = state.CoordinateGet("node2")
|
||||
if err != nil {
|
||||
@ -109,12 +111,32 @@ func TestCoordinate_Update(t *testing.T) {
|
||||
if d == nil {
|
||||
t.Fatalf("should return a coordinate but it's nil")
|
||||
}
|
||||
if !coordinatesEqual(d.Coord, arg2.Coord) {
|
||||
t.Fatalf("should be equal\n%v\n%v", d.Coord, arg2.Coord)
|
||||
verifyCoordinatesEqual(t, d.Coord, arg2.Coord)
|
||||
|
||||
// Now try spamming coordinates and make sure it flushes when the batch
|
||||
// size is hit.
|
||||
for i := 0; i < (s1.config.CoordinateUpdateMaxBatchSize + 1); i++ {
|
||||
arg1.Coord = generateRandomCoordinate()
|
||||
if err := client.Call("Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait a little while so the flush goroutine can run, then make sure
|
||||
// the last coordinate update made it in.
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
_, d, err = state.CoordinateGet("node1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if d == nil {
|
||||
t.Fatalf("should return a coordinate but it's nil")
|
||||
}
|
||||
verifyCoordinatesEqual(t, d.Coord, arg1.Coord)
|
||||
}
|
||||
|
||||
func TestCoordinate_GetLAN(t *testing.T) {
|
||||
func TestCoordinate_Get(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
@ -126,103 +148,39 @@ func TestCoordinate_GetLAN(t *testing.T) {
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node1",
|
||||
Op: structs.CoordinateSet,
|
||||
Coord: getRandomCoordinate(),
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
|
||||
// Send an initial update, waiting a little while for the flush goroutine
|
||||
// to run.
|
||||
var out struct{}
|
||||
if err := client.Call("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
// Yield the current goroutine to allow the goroutine that sends the updates to run
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Get via RPC
|
||||
out2 := structs.IndexedCoordinate{}
|
||||
// Query the coordinate via RPC.
|
||||
arg2 := structs.NodeSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node1",
|
||||
}
|
||||
if err := client.Call("Coordinate.GetLAN", &arg2, &out2); err != nil {
|
||||
coord := structs.IndexedCoordinate{}
|
||||
if err := client.Call("Coordinate.Get", &arg2, &coord); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !coordinatesEqual(out2.Coord, arg.Coord) {
|
||||
t.Fatalf("should be equal\n%v\n%v", out2.Coord, arg.Coord)
|
||||
}
|
||||
verifyCoordinatesEqual(t, coord.Coord, arg.Coord)
|
||||
|
||||
// Now let's override the original coordinate; Coordinate.Get should return
|
||||
// the latest coordinate
|
||||
arg.Coord = getRandomCoordinate()
|
||||
// Send another coordinate update, waiting after for the flush.
|
||||
arg.Coord = generateRandomCoordinate()
|
||||
if err := client.Call("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
// Yield the current goroutine to allow the goroutine that sends the updates to run
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if err := client.Call("Coordinate.GetLAN", &arg2, &out2); err != nil {
|
||||
// Now re-query and make sure the results are fresh.
|
||||
if err := client.Call("Coordinate.Get", &arg2, &coord); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !coordinatesEqual(out2.Coord, arg.Coord) {
|
||||
t.Fatalf("should be equal\n%v\n%v", out2.Coord, arg.Coord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoordinate_GetWAN(t *testing.T) {
|
||||
// Create 1 server in dc1, 2 servers in dc2
|
||||
dir1, s1 := testServerDC(t, "dc1")
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
dir2, s2 := testServerDC(t, "dc2")
|
||||
defer os.RemoveAll(dir2)
|
||||
defer s2.Shutdown()
|
||||
|
||||
dir3, s3 := testServerDC(t, "dc2")
|
||||
defer os.RemoveAll(dir3)
|
||||
defer s3.Shutdown()
|
||||
|
||||
client := rpcClient(t, s1)
|
||||
defer client.Close()
|
||||
testutil.WaitForLeader(t, client.Call, "dc1")
|
||||
|
||||
// Try to join
|
||||
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||
s1.config.SerfWANConfig.MemberlistConfig.BindPort)
|
||||
if _, err := s2.JoinWAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := s3.JoinWAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Check the members
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
return len(s1.WANMembers()) == 3, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("bad len")
|
||||
})
|
||||
|
||||
// Wait for coordinates to be exchanged
|
||||
time.Sleep(s1.config.SerfWANConfig.MemberlistConfig.ProbeInterval * 2)
|
||||
|
||||
var coords structs.CoordinateList
|
||||
arg := structs.DCSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
}
|
||||
if err := client.Call("Coordinate.GetWAN", &arg, &coords); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(coords.Coords) != 1 {
|
||||
t.Fatalf("there is 1 server in dc1")
|
||||
}
|
||||
|
||||
arg = structs.DCSpecificRequest{
|
||||
Datacenter: "dc2",
|
||||
}
|
||||
if err := client.Call("Coordinate.GetWAN", &arg, &coords); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(coords.Coords) != 2 {
|
||||
t.Fatalf("there are 2 servers in dc2")
|
||||
}
|
||||
verifyCoordinatesEqual(t, coord.Coord, arg.Coord)
|
||||
}
|
||||
|
@ -253,11 +253,10 @@ func (c *consulFSM) applyCoordinateOperation(buf []byte, index uint64) interface
|
||||
if err := structs.Decode(buf, &reqs); err != nil {
|
||||
panic(fmt.Errorf("failed to decode request: %v", err))
|
||||
}
|
||||
for i := 0; i < len(reqs); i++ {
|
||||
req := reqs[i]
|
||||
for _, req := range reqs {
|
||||
defer metrics.MeasureSince([]string{"consul", "fsm", "coordinate", string(req.Op)}, time.Now())
|
||||
switch req.Op {
|
||||
case structs.CoordinateSet:
|
||||
case structs.CoordinateUpdate:
|
||||
coord := &structs.Coordinate{Node: req.Node, Coord: req.Coord}
|
||||
if err := c.state.CoordinateUpdate(index, coord); err != nil {
|
||||
return err
|
||||
|
@ -740,13 +740,19 @@ func TestFSM_CoordinateUpdate(t *testing.T) {
|
||||
}
|
||||
defer fsm.Close()
|
||||
|
||||
nodeName := "Node1"
|
||||
reqs := make([]*structs.CoordinateUpdateRequest, 1)
|
||||
// Write a batch of two coordinates.
|
||||
reqs := make([]*structs.CoordinateUpdateRequest, 2)
|
||||
reqs[0] = &structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: nodeName,
|
||||
Op: structs.CoordinateSet,
|
||||
Coord: getRandomCoordinate(),
|
||||
Node: "node1",
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
reqs[1] = &structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node2",
|
||||
Op: structs.CoordinateUpdate,
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
buf, err := structs.Encode(structs.CoordinateRequestType, reqs)
|
||||
if err != nil {
|
||||
@ -757,17 +763,24 @@ func TestFSM_CoordinateUpdate(t *testing.T) {
|
||||
t.Fatalf("resp: %v", resp)
|
||||
}
|
||||
|
||||
// Verify key is set
|
||||
_, d, err := fsm.state.CoordinateGet(nodeName)
|
||||
// Read back the two coordinates to make sure they got updated.
|
||||
_, d, err := fsm.state.CoordinateGet(reqs[0].Node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if d == nil {
|
||||
t.Fatalf("missing")
|
||||
}
|
||||
if !coordinatesEqual(reqs[0].Coord, d.Coord) {
|
||||
t.Fatalf("wrong coordinate")
|
||||
verifyCoordinatesEqual(t, reqs[0].Coord, d.Coord)
|
||||
|
||||
_, d, err = fsm.state.CoordinateGet(reqs[1].Node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if d == nil {
|
||||
t.Fatalf("missing")
|
||||
}
|
||||
verifyCoordinatesEqual(t, reqs[1].Coord, d.Coord)
|
||||
}
|
||||
|
||||
func TestFSM_SessionCreate_Destroy(t *testing.T) {
|
||||
|
@ -309,12 +309,11 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Let Serf cache coordinates for only the WAN configuration where the
|
||||
// number of nodes should be small, and where we serve these directly
|
||||
// from Serf because they aren't managed in the catalog.
|
||||
conf.EnableCoordinates = s.config.EnableCoordinates
|
||||
if conf.EnableCoordinates && wan {
|
||||
// Cache coordinates only if it's the wan network where the number of nodes is
|
||||
// reasonably low.
|
||||
conf.CacheCoordinates = true
|
||||
}
|
||||
conf.CacheCoordinates = s.config.EnableCoordinates && wan
|
||||
|
||||
return serf.Create(conf)
|
||||
}
|
||||
@ -406,10 +405,7 @@ func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
|
||||
s.endpoints.Session = &Session{s}
|
||||
s.endpoints.Internal = &Internal{s}
|
||||
s.endpoints.ACL = &ACL{s}
|
||||
s.endpoints.Coordinate = &Coordinate{
|
||||
srv: s,
|
||||
updateLastSent: time.Now(),
|
||||
}
|
||||
s.endpoints.Coordinate = NewCoordinate(s)
|
||||
|
||||
// Register the handlers
|
||||
s.rpcServer.Register(s.endpoints.Status)
|
||||
@ -706,12 +702,12 @@ func (s *Server) Stats() map[string]map[string]string {
|
||||
return stats
|
||||
}
|
||||
|
||||
// GetLANCoordinate returns the LAN coordinate of the server
|
||||
// GetLANCoordinate returns the coordinate of the server in the LAN gossip pool.
|
||||
func (s *Server) GetLANCoordinate() *coordinate.Coordinate {
|
||||
return s.serfLAN.GetCoordinate()
|
||||
}
|
||||
|
||||
// GetWANCoordinate returns the WAN coordinate of the server
|
||||
// GetWANCoordinate returns the coordinate of the server in the WAN gossip pool.
|
||||
func (s *Server) GetWANCoordinate() *coordinate.Coordinate {
|
||||
return s.serfWAN.GetCoordinate()
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ func testServerConfig(t *testing.T, NodeName string) (string, *Config) {
|
||||
config.ReconcileInterval = 100 * time.Millisecond
|
||||
|
||||
config.EnableCoordinates = true
|
||||
config.CoordinateUpdatePeriod = 0 // make updates instant
|
||||
config.CoordinateUpdatePeriod = 0 * time.Millisecond
|
||||
return dir, config
|
||||
}
|
||||
|
||||
|
@ -620,28 +620,36 @@ type ACLPolicy struct {
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
// Coordinate stores a mapping from a node to its network coordinate
|
||||
// Coordinate stores a node name with its associated network coordinate. This is
|
||||
// used when querying WAN coordinates.
|
||||
type Coordinate struct {
|
||||
Node string
|
||||
Coord *coordinate.Coordinate
|
||||
}
|
||||
|
||||
// CoordinateList is a list of Coordinate structs. This is used when querying WAN
|
||||
// coordinates.
|
||||
type CoordinateList struct {
|
||||
Coords []Coordinate
|
||||
}
|
||||
|
||||
// IndexedCoordinate is used to represent a single node's coordinate from the state
|
||||
// store. This is used when querying LAN coordinates.
|
||||
type IndexedCoordinate struct {
|
||||
Coord *coordinate.Coordinate
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
// CoordinateOp is used for encoding coordinate-related RPC requests.
|
||||
type CoordinateOp string
|
||||
|
||||
const (
|
||||
CoordinateSet CoordinateOp = "set"
|
||||
// CoordinateUpdate is used to update a node's coordinates in the catalog.
|
||||
CoordinateUpdate CoordinateOp = "update"
|
||||
)
|
||||
|
||||
// CoordinateUpdateRequest is used to update the network coordinate of a given node
|
||||
// CoordinateUpdateRequest is used to update the network coordinate of a given
|
||||
// node.
|
||||
type CoordinateUpdateRequest struct {
|
||||
Datacenter string
|
||||
Node string
|
||||
@ -650,6 +658,7 @@ type CoordinateUpdateRequest struct {
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given update request.
|
||||
func (c *CoordinateUpdateRequest) RequestDatacenter() string {
|
||||
return c.Datacenter
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user