consul/agent/local_test.go
Frank Schroeder 58b0e153f9
Revert "agent: decouple anti-entropy from local state"
This reverts commit a842dc9c2bf00855ef93211232da36b2d91eab5b.
2017-10-23 10:08:35 +02:00

1684 lines
41 KiB
Go

package agent
import (
"reflect"
"testing"
"time"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/consul/types"
"github.com/pascaldekloe/goe/verify"
)
func TestAgentAntiEntropy_Services(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Register info
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
Address: "127.0.0.1",
}
// Exists both, same (noop)
var out struct{}
srv1 := &structs.NodeService{
ID: "mysql",
Service: "mysql",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv1, "")
args.Service = srv1
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists both, different (update)
srv2 := &structs.NodeService{
ID: "redis",
Service: "redis",
Tags: []string{},
Port: 8000,
}
a.state.AddService(srv2, "")
srv2_mod := new(structs.NodeService)
*srv2_mod = *srv2
srv2_mod.Port = 9000
args.Service = srv2_mod
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists local (create)
srv3 := &structs.NodeService{
ID: "web",
Service: "web",
Tags: []string{},
Port: 80,
}
a.state.AddService(srv3, "")
// Exists remote (delete)
srv4 := &structs.NodeService{
ID: "lb",
Service: "lb",
Tags: []string{},
Port: 443,
}
args.Service = srv4
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists both, different address (update)
srv5 := &structs.NodeService{
ID: "api",
Service: "api",
Tags: []string{},
Address: "127.0.0.10",
Port: 8000,
}
a.state.AddService(srv5, "")
srv5_mod := new(structs.NodeService)
*srv5_mod = *srv5
srv5_mod.Address = "127.0.0.1"
args.Service = srv5_mod
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists local, in sync, remote missing (create)
srv6 := &structs.NodeService{
ID: "cache",
Service: "cache",
Tags: []string{},
Port: 11211,
}
a.state.AddService(srv6, "")
// todo(fs): data race
a.state.Lock()
a.state.serviceStatus["cache"] = syncStatus{inSync: true}
a.state.Unlock()
// Trigger anti-entropy run and wait
a.StartSync()
var services structs.IndexedNodeServices
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
r.Fatalf("err: %v", err)
}
// Make sure we sent along our node info when we synced.
id := services.NodeServices.Node.ID
addrs := services.NodeServices.Node.TaggedAddresses
meta := services.NodeServices.Node.Meta
delete(meta, structs.MetaSegmentKey) // Added later, not in config.
verify.Values(r, "node id", id, a.config.NodeID)
verify.Values(r, "tagged addrs", addrs, a.config.TaggedAddresses)
verify.Values(r, "node meta", meta, a.config.NodeMeta)
// We should have 6 services (consul included)
if len(services.NodeServices.Services) != 6 {
r.Fatalf("bad: %v", services.NodeServices.Services)
}
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "mysql":
if !reflect.DeepEqual(serv, srv1) {
r.Fatalf("bad: %v %v", serv, srv1)
}
case "redis":
if !reflect.DeepEqual(serv, srv2) {
r.Fatalf("bad: %#v %#v", serv, srv2)
}
case "web":
if !reflect.DeepEqual(serv, srv3) {
r.Fatalf("bad: %v %v", serv, srv3)
}
case "api":
if !reflect.DeepEqual(serv, srv5) {
r.Fatalf("bad: %v %v", serv, srv5)
}
case "cache":
if !reflect.DeepEqual(serv, srv6) {
r.Fatalf("bad: %v %v", serv, srv6)
}
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 5 {
r.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 5 {
r.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
if !status.inSync {
r.Fatalf("should be in sync: %v %v", name, status)
}
}
})
// Remove one of the services
a.state.RemoveService("api")
// Trigger anti-entropy run and wait
a.StartSync()
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
r.Fatalf("err: %v", err)
}
// We should have 5 services (consul included)
if len(services.NodeServices.Services) != 5 {
r.Fatalf("bad: %v", services.NodeServices.Services)
}
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "mysql":
if !reflect.DeepEqual(serv, srv1) {
r.Fatalf("bad: %v %v", serv, srv1)
}
case "redis":
if !reflect.DeepEqual(serv, srv2) {
r.Fatalf("bad: %#v %#v", serv, srv2)
}
case "web":
if !reflect.DeepEqual(serv, srv3) {
r.Fatalf("bad: %v %v", serv, srv3)
}
case "cache":
if !reflect.DeepEqual(serv, srv6) {
r.Fatalf("bad: %v %v", serv, srv6)
}
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 4 {
r.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 4 {
r.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
if !status.inSync {
r.Fatalf("should be in sync: %v %v", name, status)
}
}
})
}
func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), NoInitialSync: true}
a.Start()
defer a.Shutdown()
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
Address: "127.0.0.1",
}
var out struct{}
// EnableTagOverride = true
srv1 := &structs.NodeService{
ID: "svc_id1",
Service: "svc1",
Tags: []string{"tag1"},
Port: 6100,
EnableTagOverride: true,
}
a.state.AddService(srv1, "")
srv1_mod := new(structs.NodeService)
*srv1_mod = *srv1
srv1_mod.Port = 7100
srv1_mod.Tags = []string{"tag1_mod"}
args.Service = srv1_mod
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// EnableTagOverride = false
srv2 := &structs.NodeService{
ID: "svc_id2",
Service: "svc2",
Tags: []string{"tag2"},
Port: 6200,
EnableTagOverride: false,
}
a.state.AddService(srv2, "")
srv2_mod := new(structs.NodeService)
*srv2_mod = *srv2
srv2_mod.Port = 7200
srv2_mod.Tags = []string{"tag2_mod"}
args.Service = srv2_mod
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Trigger anti-entropy run and wait
a.StartSync()
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var services structs.IndexedNodeServices
retry.Run(t, func(r *retry.R) {
// runtime.Gosched()
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
r.Fatalf("err: %v", err)
}
a.state.RLock()
defer a.state.RUnlock()
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "svc_id1":
if serv.ID != "svc_id1" ||
serv.Service != "svc1" ||
serv.Port != 6100 ||
!reflect.DeepEqual(serv.Tags, []string{"tag1_mod"}) {
r.Fatalf("bad: %v %v", serv, srv1)
}
case "svc_id2":
if serv.ID != "svc_id2" ||
serv.Service != "svc2" ||
serv.Port != 6200 ||
!reflect.DeepEqual(serv.Tags, []string{"tag2"}) {
r.Fatalf("bad: %v %v", serv, srv2)
}
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
a.state.RLock()
defer a.state.RUnlock()
for name, status := range a.state.serviceStatus {
if !status.inSync {
r.Fatalf("should be in sync: %v %v", name, status)
}
}
})
}
func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")
defer a.Shutdown()
{
// Single check
srv := &structs.NodeService{
ID: "mysql",
Service: "mysql",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv, "")
chk := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql",
Name: "mysql",
ServiceID: "mysql",
Status: api.HealthPassing,
}
a.state.AddCheck(chk, "")
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Sync the service once
if err := a.state.syncService("mysql"); err != nil {
t.Fatalf("err: %s", err)
}
}()
// We should have 2 services (consul included)
svcReq := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &svcReq, &services); err != nil {
t.Fatalf("err: %v", err)
}
if len(services.NodeServices.Services) != 2 {
t.Fatalf("bad: %v", services.NodeServices.Services)
}
// We should have one health check
chkReq := structs.ServiceSpecificRequest{
Datacenter: "dc1",
ServiceName: "mysql",
}
var checks structs.IndexedHealthChecks
if err := a.RPC("Health.ServiceChecks", &chkReq, &checks); err != nil {
t.Fatalf("err: %v", err)
}
if len(checks.HealthChecks) != 1 {
t.Fatalf("bad: %v", checks)
}
}
{
// Multiple checks
srv := &structs.NodeService{
ID: "redis",
Service: "redis",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv, "")
chk1 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "redis:1",
Name: "redis:1",
ServiceID: "redis",
Status: api.HealthPassing,
}
a.state.AddCheck(chk1, "")
chk2 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "redis:2",
Name: "redis:2",
ServiceID: "redis",
Status: api.HealthPassing,
}
a.state.AddCheck(chk2, "")
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Sync the service once
if err := a.state.syncService("redis"); err != nil {
t.Fatalf("err: %s", err)
}
}()
// We should have 3 services (consul included)
svcReq := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &svcReq, &services); err != nil {
t.Fatalf("err: %v", err)
}
if len(services.NodeServices.Services) != 3 {
t.Fatalf("bad: %v", services.NodeServices.Services)
}
// We should have two health checks
chkReq := structs.ServiceSpecificRequest{
Datacenter: "dc1",
ServiceName: "redis",
}
var checks structs.IndexedHealthChecks
if err := a.RPC("Health.ServiceChecks", &chkReq, &checks); err != nil {
t.Fatalf("err: %v", err)
}
if len(checks.HealthChecks) != 2 {
t.Fatalf("bad: %v", checks)
}
}
}
var testRegisterRules = `
node "" {
policy = "write"
}
service "api" {
policy = "write"
}
service "consul" {
policy = "write"
}
`
func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), HCL: `
acl_datacenter = "dc1"
acl_master_token = "root"
acl_default_policy = "deny"
acl_enforce_version_8 = true
`, NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Create the ACL
arg := structs.ACLRequest{
Datacenter: "dc1",
Op: structs.ACLSet,
ACL: structs.ACL{
Name: "User token",
Type: structs.ACLTypeClient,
Rules: testRegisterRules,
},
WriteRequest: structs.WriteRequest{
Token: "root",
},
}
var token string
if err := a.RPC("ACL.Apply", &arg, &token); err != nil {
t.Fatalf("err: %v", err)
}
// Create service (disallowed)
srv1 := &structs.NodeService{
ID: "mysql",
Service: "mysql",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv1, token)
// Create service (allowed)
srv2 := &structs.NodeService{
ID: "api",
Service: "api",
Tags: []string{"foo"},
Port: 5001,
}
a.state.AddService(srv2, token)
// Trigger anti-entropy run and wait
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that we are in sync
{
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
QueryOptions: structs.QueryOptions{
Token: "root",
},
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
t.Fatalf("err: %v", err)
}
// We should have 2 services (consul included)
if len(services.NodeServices.Services) != 2 {
t.Fatalf("bad: %v", services.NodeServices.Services)
}
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "mysql":
t.Fatalf("should not be permitted")
case "api":
if !reflect.DeepEqual(serv, srv2) {
t.Fatalf("bad: %#v %#v", serv, srv2)
}
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 2 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 2 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
}
// Now remove the service and re-sync
a.state.RemoveService("api")
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that we are in sync
{
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
QueryOptions: structs.QueryOptions{
Token: "root",
},
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
t.Fatalf("err: %v", err)
}
// We should have 1 service (just consul)
if len(services.NodeServices.Services) != 1 {
t.Fatalf("bad: %v", services.NodeServices.Services)
}
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "mysql":
t.Fatalf("should not be permitted")
case "api":
t.Fatalf("should be deleted")
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 1 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 1 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
}
// Make sure the token got cleaned up.
if token := a.state.ServiceToken("api"); token != "" {
t.Fatalf("bad: %s", token)
}
}
func TestAgentAntiEntropy_Checks(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Register info
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
Address: "127.0.0.1",
}
// Exists both, same (noop)
var out struct{}
chk1 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql",
Name: "mysql",
Status: api.HealthPassing,
}
a.state.AddCheck(chk1, "")
args.Check = chk1
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists both, different (update)
chk2 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "redis",
Name: "redis",
Status: api.HealthPassing,
}
a.state.AddCheck(chk2, "")
chk2_mod := new(structs.HealthCheck)
*chk2_mod = *chk2
chk2_mod.Status = api.HealthCritical
args.Check = chk2_mod
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists local (create)
chk3 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "web",
Name: "web",
Status: api.HealthPassing,
}
a.state.AddCheck(chk3, "")
// Exists remote (delete)
chk4 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "lb",
Name: "lb",
Status: api.HealthPassing,
}
args.Check = chk4
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Exists local, in sync, remote missing (create)
chk5 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "cache",
Name: "cache",
Status: api.HealthPassing,
}
a.state.AddCheck(chk5, "")
// todo(fs): data race
a.state.Lock()
a.state.checkStatus["cache"] = syncStatus{inSync: true}
a.state.Unlock()
// Trigger anti-entropy run and wait
a.StartSync()
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var checks structs.IndexedHealthChecks
// Verify that we are in sync
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatalf("err: %v", err)
}
// We should have 5 checks (serf included)
if len(checks.HealthChecks) != 5 {
r.Fatalf("bad: %v", checks)
}
// All the checks should match
for _, chk := range checks.HealthChecks {
chk.CreateIndex, chk.ModifyIndex = 0, 0
switch chk.CheckID {
case "mysql":
if !reflect.DeepEqual(chk, chk1) {
r.Fatalf("bad: %v %v", chk, chk1)
}
case "redis":
if !reflect.DeepEqual(chk, chk2) {
r.Fatalf("bad: %v %v", chk, chk2)
}
case "web":
if !reflect.DeepEqual(chk, chk3) {
r.Fatalf("bad: %v %v", chk, chk3)
}
case "cache":
if !reflect.DeepEqual(chk, chk5) {
r.Fatalf("bad: %v %v", chk, chk5)
}
case "serfHealth":
// ignore
default:
r.Fatalf("unexpected check: %v", chk)
}
}
})
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.checks) != 4 {
t.Fatalf("bad: %v", a.state.checks)
}
if len(a.state.checkStatus) != 4 {
t.Fatalf("bad: %v", a.state.checkStatus)
}
for name, status := range a.state.checkStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
// Make sure we sent along our node info addresses when we synced.
{
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
t.Fatalf("err: %v", err)
}
id := services.NodeServices.Node.ID
addrs := services.NodeServices.Node.TaggedAddresses
meta := services.NodeServices.Node.Meta
delete(meta, structs.MetaSegmentKey) // Added later, not in config.
verify.Values(t, "node id", id, a.config.NodeID)
verify.Values(t, "tagged addrs", addrs, a.config.TaggedAddresses)
verify.Values(t, "node meta", meta, a.config.NodeMeta)
}
// Remove one of the checks
a.state.RemoveCheck("redis")
// Trigger anti-entropy run and wait
a.StartSync()
// Verify that we are in sync
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatalf("err: %v", err)
}
// We should have 5 checks (serf included)
if len(checks.HealthChecks) != 4 {
r.Fatalf("bad: %v", checks)
}
// All the checks should match
for _, chk := range checks.HealthChecks {
chk.CreateIndex, chk.ModifyIndex = 0, 0
switch chk.CheckID {
case "mysql":
if !reflect.DeepEqual(chk, chk1) {
r.Fatalf("bad: %v %v", chk, chk1)
}
case "web":
if !reflect.DeepEqual(chk, chk3) {
r.Fatalf("bad: %v %v", chk, chk3)
}
case "cache":
if !reflect.DeepEqual(chk, chk5) {
r.Fatalf("bad: %v %v", chk, chk5)
}
case "serfHealth":
// ignore
default:
r.Fatalf("unexpected check: %v", chk)
}
}
})
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.checks) != 3 {
t.Fatalf("bad: %v", a.state.checks)
}
if len(a.state.checkStatus) != 3 {
t.Fatalf("bad: %v", a.state.checkStatus)
}
for name, status := range a.state.checkStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
}
func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), HCL: `
acl_datacenter = "dc1"
acl_master_token = "root"
acl_default_policy = "deny"
acl_enforce_version_8 = true
`, NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Create the ACL
arg := structs.ACLRequest{
Datacenter: "dc1",
Op: structs.ACLSet,
ACL: structs.ACL{
Name: "User token",
Type: structs.ACLTypeClient,
Rules: testRegisterRules,
},
WriteRequest: structs.WriteRequest{
Token: "root",
},
}
var token string
if err := a.RPC("ACL.Apply", &arg, &token); err != nil {
t.Fatalf("err: %v", err)
}
// Create services using the root token
srv1 := &structs.NodeService{
ID: "mysql",
Service: "mysql",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv1, "root")
srv2 := &structs.NodeService{
ID: "api",
Service: "api",
Tags: []string{"foo"},
Port: 5001,
}
a.state.AddService(srv2, "root")
// Trigger anti-entropy run and wait
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that we are in sync
{
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
QueryOptions: structs.QueryOptions{
Token: "root",
},
}
var services structs.IndexedNodeServices
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
t.Fatalf("err: %v", err)
}
// We should have 3 services (consul included)
if len(services.NodeServices.Services) != 3 {
t.Fatalf("bad: %v", services.NodeServices.Services)
}
// All the services should match
for id, serv := range services.NodeServices.Services {
serv.CreateIndex, serv.ModifyIndex = 0, 0
switch id {
case "mysql":
if !reflect.DeepEqual(serv, srv1) {
t.Fatalf("bad: %#v %#v", serv, srv1)
}
case "api":
if !reflect.DeepEqual(serv, srv2) {
t.Fatalf("bad: %#v %#v", serv, srv2)
}
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
}
}
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 2 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 2 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
}
// This check won't be allowed.
chk1 := &structs.HealthCheck{
Node: a.Config.NodeName,
ServiceID: "mysql",
ServiceName: "mysql",
ServiceTags: []string{"master"},
CheckID: "mysql-check",
Name: "mysql",
Status: api.HealthPassing,
}
a.state.AddCheck(chk1, token)
// This one will be allowed.
chk2 := &structs.HealthCheck{
Node: a.Config.NodeName,
ServiceID: "api",
ServiceName: "api",
ServiceTags: []string{"foo"},
CheckID: "api-check",
Name: "api",
Status: api.HealthPassing,
}
a.state.AddCheck(chk2, token)
// Trigger anti-entropy run and wait.
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that we are in sync
retry.Run(t, func(r *retry.R) {
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
QueryOptions: structs.QueryOptions{
Token: "root",
},
}
var checks structs.IndexedHealthChecks
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatalf("err: %v", err)
}
// We should have 2 checks (serf included)
if len(checks.HealthChecks) != 2 {
r.Fatalf("bad: %v", checks)
}
// All the checks should match
for _, chk := range checks.HealthChecks {
chk.CreateIndex, chk.ModifyIndex = 0, 0
switch chk.CheckID {
case "mysql-check":
t.Fatalf("should not be permitted")
case "api-check":
if !reflect.DeepEqual(chk, chk2) {
r.Fatalf("bad: %v %v", chk, chk2)
}
case "serfHealth":
// ignore
default:
r.Fatalf("unexpected check: %v", chk)
}
}
})
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state.
if len(a.state.checks) != 2 {
t.Fatalf("bad: %v", a.state.checks)
}
if len(a.state.checkStatus) != 2 {
t.Fatalf("bad: %v", a.state.checkStatus)
}
for name, status := range a.state.checkStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
// Now delete the check and wait for sync.
a.state.RemoveCheck("api-check")
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that we are in sync
retry.Run(t, func(r *retry.R) {
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
QueryOptions: structs.QueryOptions{
Token: "root",
},
}
var checks structs.IndexedHealthChecks
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatalf("err: %v", err)
}
// We should have 1 check (just serf)
if len(checks.HealthChecks) != 1 {
r.Fatalf("bad: %v", checks)
}
// All the checks should match
for _, chk := range checks.HealthChecks {
chk.CreateIndex, chk.ModifyIndex = 0, 0
switch chk.CheckID {
case "mysql-check":
r.Fatalf("should not be permitted")
case "api-check":
r.Fatalf("should be deleted")
case "serfHealth":
// ignore
default:
r.Fatalf("unexpected check: %v", chk)
}
}
})
// todo(fs): data race
func() {
a.state.RLock()
defer a.state.RUnlock()
// Check the local state.
if len(a.state.checks) != 1 {
t.Fatalf("bad: %v", a.state.checks)
}
if len(a.state.checkStatus) != 1 {
t.Fatalf("bad: %v", a.state.checkStatus)
}
for name, status := range a.state.checkStatus {
if !status.inSync {
t.Fatalf("should be in sync: %v %v", name, status)
}
}
}()
// Make sure the token got cleaned up.
if token := a.state.CheckToken("api-check"); token != "" {
t.Fatalf("bad: %s", token)
}
}
func TestAgent_UpdateCheck_DiscardOutput(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), `
discard_check_output = true
check_update_interval = "0s" # set to "0s" since otherwise output checks are deferred
`)
defer a.Shutdown()
inSync := func(id string) bool {
a.state.Lock()
defer a.state.Unlock()
return a.state.checkStatus[types.CheckID(id)].inSync
}
// register a check
check := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "web",
Name: "web",
Status: api.HealthPassing,
Output: "first output",
}
if err := a.state.AddCheck(check, ""); err != nil {
t.Fatalf("bad: %s", err)
}
// wait until the check is in sync
retry.Run(t, func(r *retry.R) {
if inSync("web") {
return
}
r.FailNow()
})
// update the check with the same status but different output
// and the check should still be in sync.
a.state.UpdateCheck(check.CheckID, api.HealthPassing, "second output")
if !inSync("web") {
t.Fatal("check should be in sync")
}
// disable discarding of check output and update the check again with different
// output. Then the check should be out of sync.
a.state.SetDiscardCheckOutput(false)
a.state.UpdateCheck(check.CheckID, api.HealthPassing, "third output")
if inSync("web") {
t.Fatal("check should be out of sync")
}
}
func TestAgentAntiEntropy_Check_DeferSync(t *testing.T) {
t.Parallel()
a := &TestAgent{Name: t.Name(), HCL: `
check_update_interval = "500ms"
`, NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Create a check
check := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "web",
Name: "web",
Status: api.HealthPassing,
Output: "",
}
a.state.AddCheck(check, "")
// Trigger anti-entropy run and wait
a.StartSync()
// Verify that we are in sync
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var checks structs.IndexedHealthChecks
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatalf("err: %v", err)
}
if got, want := len(checks.HealthChecks), 2; got != want {
r.Fatalf("got %d health checks want %d", got, want)
}
})
// Update the check output! Should be deferred
a.state.UpdateCheck("web", api.HealthPassing, "output")
// Should not update for 500 milliseconds
time.Sleep(250 * time.Millisecond)
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
t.Fatalf("err: %v", err)
}
// Verify not updated
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "" {
t.Fatalf("early update: %v", chk)
}
}
}
// Wait for a deferred update
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatal(err)
}
// Verify updated
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "output" {
r.Fatalf("no update: %v", chk)
}
}
}
})
// Change the output in the catalog to force it out of sync.
eCopy := check.Clone()
eCopy.Output = "changed"
reg := structs.RegisterRequest{
Datacenter: a.Config.Datacenter,
Node: a.Config.NodeName,
Address: a.Config.AdvertiseAddrLAN.IP.String(),
TaggedAddresses: a.Config.TaggedAddresses,
Check: eCopy,
WriteRequest: structs.WriteRequest{},
}
var out struct{}
if err := a.RPC("Catalog.Register", &reg, &out); err != nil {
t.Fatalf("err: %s", err)
}
// Verify that the output is out of sync.
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
t.Fatalf("err: %v", err)
}
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "changed" {
t.Fatalf("unexpected update: %v", chk)
}
}
}
// Trigger anti-entropy run and wait.
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that the output was synced back to the agent's value.
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
t.Fatalf("err: %v", err)
}
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "output" {
t.Fatalf("missed update: %v", chk)
}
}
}
// Reset the catalog again.
if err := a.RPC("Catalog.Register", &reg, &out); err != nil {
t.Fatalf("err: %s", err)
}
// Verify that the output is out of sync.
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
t.Fatalf("err: %v", err)
}
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "changed" {
t.Fatalf("unexpected update: %v", chk)
}
}
}
// Now make an update that should be deferred.
a.state.UpdateCheck("web", api.HealthPassing, "deferred")
// Trigger anti-entropy run and wait.
a.StartSync()
time.Sleep(200 * time.Millisecond)
// Verify that the output is still out of sync since there's a deferred
// update pending.
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
t.Fatalf("err: %v", err)
}
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "changed" {
t.Fatalf("unexpected update: %v", chk)
}
}
}
// Wait for the deferred update.
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Health.NodeChecks", &req, &checks); err != nil {
r.Fatal(err)
}
// Verify updated
for _, chk := range checks.HealthChecks {
switch chk.CheckID {
case "web":
if chk.Output != "deferred" {
r.Fatalf("no update: %v", chk)
}
}
}
})
}
func TestAgentAntiEntropy_NodeInfo(t *testing.T) {
t.Parallel()
nodeID := types.NodeID("40e4a748-2192-161a-0510-9bf59fe950b5")
nodeMeta := map[string]string{
"somekey": "somevalue",
}
a := &TestAgent{Name: t.Name(), HCL: `
node_id = "40e4a748-2192-161a-0510-9bf59fe950b5"
node_meta {
somekey = "somevalue"
}
`, NoInitialSync: true}
a.Start()
defer a.Shutdown()
// Register info
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
Address: "127.0.0.1",
}
var out struct{}
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Trigger anti-entropy run and wait
a.StartSync()
req := structs.NodeSpecificRequest{
Datacenter: "dc1",
Node: a.Config.NodeName,
}
var services structs.IndexedNodeServices
// Wait for the sync
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
r.Fatalf("err: %v", err)
}
// Make sure we synced our node info - this should have ridden on the
// "consul" service sync
id := services.NodeServices.Node.ID
addrs := services.NodeServices.Node.TaggedAddresses
meta := services.NodeServices.Node.Meta
delete(meta, structs.MetaSegmentKey) // Added later, not in config.
if id != nodeID ||
!reflect.DeepEqual(addrs, a.config.TaggedAddresses) ||
!reflect.DeepEqual(meta, nodeMeta) {
r.Fatalf("bad: %v", services.NodeServices.Node)
}
})
// Blow away the catalog version of the node info
if err := a.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
// Trigger anti-entropy run and wait
a.StartSync()
// Wait for the sync - this should have been a sync of just the node info
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Catalog.NodeServices", &req, &services); err != nil {
r.Fatalf("err: %v", err)
}
id := services.NodeServices.Node.ID
addrs := services.NodeServices.Node.TaggedAddresses
meta := services.NodeServices.Node.Meta
delete(meta, structs.MetaSegmentKey) // Added later, not in config.
if id != nodeID ||
!reflect.DeepEqual(addrs, a.config.TaggedAddresses) ||
!reflect.DeepEqual(meta, nodeMeta) {
r.Fatalf("bad: %v", services.NodeServices.Node)
}
})
}
func TestAgentAntiEntropy_deleteService_fails(t *testing.T) {
t.Parallel()
l := new(localState)
// todo(fs): data race
l.Lock()
defer l.Unlock()
if err := l.deleteService(""); err == nil {
t.Fatalf("should have failed")
}
}
func TestAgentAntiEntropy_deleteCheck_fails(t *testing.T) {
t.Parallel()
l := new(localState)
// todo(fs): data race
l.Lock()
defer l.Unlock()
if err := l.deleteCheck(""); err == nil {
t.Fatalf("should have errored")
}
}
func TestAgent_serviceTokens(t *testing.T) {
t.Parallel()
tokens := new(token.Store)
tokens.UpdateUserToken("default")
l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, tokens)
l.AddService(&structs.NodeService{
ID: "redis",
}, "")
// Returns default when no token is set
if token := l.ServiceToken("redis"); token != "default" {
t.Fatalf("bad: %s", token)
}
// Returns configured token
l.serviceTokens["redis"] = "abc123"
if token := l.ServiceToken("redis"); token != "abc123" {
t.Fatalf("bad: %s", token)
}
// Keeps token around for the delete
l.RemoveService("redis")
if token := l.ServiceToken("redis"); token != "abc123" {
t.Fatalf("bad: %s", token)
}
}
func TestAgent_checkTokens(t *testing.T) {
t.Parallel()
tokens := new(token.Store)
tokens.UpdateUserToken("default")
l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, tokens)
// Returns default when no token is set
if token := l.CheckToken("mem"); token != "default" {
t.Fatalf("bad: %s", token)
}
// Returns configured token
l.checkTokens["mem"] = "abc123"
if token := l.CheckToken("mem"); token != "abc123" {
t.Fatalf("bad: %s", token)
}
// Keeps token around for the delete
l.RemoveCheck("mem")
if token := l.CheckToken("mem"); token != "abc123" {
t.Fatalf("bad: %s", token)
}
}
func TestAgent_checkCriticalTime(t *testing.T) {
t.Parallel()
l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, new(token.Store))
svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000}
l.AddService(svc, "")
// Add a passing check and make sure it's not critical.
checkID := types.CheckID("redis:1")
chk := &structs.HealthCheck{
Node: "node",
CheckID: checkID,
Name: "redis:1",
ServiceID: "redis",
Status: api.HealthPassing,
}
l.AddCheck(chk, "")
if checks := l.CriticalChecks(); len(checks) > 0 {
t.Fatalf("should not have any critical checks")
}
// Set it to warning and make sure that doesn't show up as critical.
l.UpdateCheck(checkID, api.HealthWarning, "")
if checks := l.CriticalChecks(); len(checks) > 0 {
t.Fatalf("should not have any critical checks")
}
// Fail the check and make sure the time looks reasonable.
l.UpdateCheck(checkID, api.HealthCritical, "")
if crit, ok := l.CriticalChecks()[checkID]; !ok {
t.Fatalf("should have a critical check")
} else if crit.CriticalFor > time.Millisecond {
t.Fatalf("bad: %#v", crit)
}
// Wait a while, then fail it again and make sure the time keeps track
// of the initial failure, and doesn't reset here.
time.Sleep(50 * time.Millisecond)
l.UpdateCheck(chk.CheckID, api.HealthCritical, "")
if crit, ok := l.CriticalChecks()[checkID]; !ok {
t.Fatalf("should have a critical check")
} else if crit.CriticalFor < 25*time.Millisecond ||
crit.CriticalFor > 75*time.Millisecond {
t.Fatalf("bad: %#v", crit)
}
// Set it passing again.
l.UpdateCheck(checkID, api.HealthPassing, "")
if checks := l.CriticalChecks(); len(checks) > 0 {
t.Fatalf("should not have any critical checks")
}
// Fail the check and make sure the time looks like it started again
// from the latest failure, not the original one.
l.UpdateCheck(checkID, api.HealthCritical, "")
if crit, ok := l.CriticalChecks()[checkID]; !ok {
t.Fatalf("should have a critical check")
} else if crit.CriticalFor > time.Millisecond {
t.Fatalf("bad: %#v", crit)
}
}
func TestAgent_AddCheckFailure(t *testing.T) {
t.Parallel()
l := NewLocalState(config.DefaultRuntimeConfig(`bind_addr = "127.0.0.1" data_dir = "dummy"`), nil, new(token.Store))
// Add a check for a service that does not exist and verify that it fails
checkID := types.CheckID("redis:1")
chk := &structs.HealthCheck{
Node: "node",
CheckID: checkID,
Name: "redis:1",
ServiceID: "redis",
Status: api.HealthPassing,
}
expectedErr := "ServiceID \"redis\" does not exist"
if err := l.AddCheck(chk, ""); err == nil || expectedErr != err.Error() {
t.Fatalf("Expected error when adding a check for a non-existent service but got %v", err)
}
}
func TestAgent_nestedPauseResume(t *testing.T) {
t.Parallel()
l := new(localState)
if l.isPaused() != false {
t.Fatal("localState should be unPaused after init")
}
l.Pause()
if l.isPaused() != true {
t.Fatal("localState should be Paused after first call to Pause()")
}
l.Pause()
if l.isPaused() != true {
t.Fatal("localState should STILL be Paused after second call to Pause()")
}
l.Resume()
if l.isPaused() != true {
t.Fatal("localState should STILL be Paused after FIRST call to Resume()")
}
l.Resume()
if l.isPaused() != false {
t.Fatal("localState should NOT be Paused after SECOND call to Resume()")
}
defer func() {
err := recover()
if err == nil {
t.Fatal("unbalanced Resume() should cause a panic()")
}
}()
l.Resume()
}
func TestAgent_sendCoordinate(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), `
sync_coordinate_interval_min = "1ms"
sync_coordinate_rate_target = 10.0
consul = {
coordinate = {
update_period = "100ms"
update_batch_size = 10
update_max_batches = 1
}
}
`)
defer a.Shutdown()
t.Logf("%d %d %s", a.consulConfig().CoordinateUpdateBatchSize, a.consulConfig().CoordinateUpdateMaxBatches,
a.consulConfig().CoordinateUpdatePeriod.String())
// Make sure the coordinate is present.
req := structs.DCSpecificRequest{
Datacenter: a.Config.Datacenter,
}
var reply structs.IndexedCoordinates
retry.Run(t, func(r *retry.R) {
if err := a.RPC("Coordinate.ListNodes", &req, &reply); err != nil {
r.Fatalf("err: %s", err)
}
if len(reply.Coordinates) != 1 {
r.Fatalf("expected a coordinate: %v", reply)
}
coord := reply.Coordinates[0]
if coord.Node != a.Config.NodeName || coord.Coord == nil {
r.Fatalf("bad: %v", coord)
}
})
}