From fb8900156ad8dec15d4a72073b4802a4801390f2 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Fri, 16 Oct 2015 00:03:16 -0700 Subject: [PATCH] Adds a "consul rtt" command. --- command/rtt.go | 147 ++++++++++++++++ command/rtt_test.go | 162 ++++++++++++++++++ commands.go | 6 + .../source/docs/commands/index.html.markdown | 1 + .../source/docs/commands/rtt.html.markdown | 54 ++++++ 5 files changed, 370 insertions(+) create mode 100644 command/rtt.go create mode 100644 command/rtt_test.go create mode 100644 website/source/docs/commands/rtt.html.markdown diff --git a/command/rtt.go b/command/rtt.go new file mode 100644 index 0000000000..731016a66b --- /dev/null +++ b/command/rtt.go @@ -0,0 +1,147 @@ +package command + +import ( + "flag" + "fmt" + "strings" + + "github.com/hashicorp/consul/api" + "github.com/hashicorp/serf/coordinate" + "github.com/mitchellh/cli" +) + +// RttCommand is a Command implementation that allows users to query the +// estimated round trip time between nodes using network coordinates. +type RttCommand struct { + Ui cli.Ui +} + +func (c *RttCommand) Help() string { + helpText := ` +Usage: consul rtt [options] node1 node2 + + Estimates the round trip time between two nodes using Consul's network + coordinate model of the cluster. + + By default, the two nodes are assumed to be nodes in the local datacenter + and the LAN coordinates are used. If the -wan option is given, then the WAN + coordinates are used, and the node names must be prefixed by the datacenter + and a period (eg. "dc1.sever"). + + It is not possible to measure between LAN coordinates and WAN coordinates + because they are maintained by independent Serf gossip pools, so they are + not compatible. + + The two node names are required. Note that these are node names as known to + Consul as "consul members" would show, not IP addresses. + +Options: + + -wan Use WAN coordinates instead of LAN coordinates. + -http-addr=127.0.0.1:8500 HTTP address of the Consul agent. +` + return strings.TrimSpace(helpText) +} + +func (c *RttCommand) Run(args []string) int { + var wan bool + + cmdFlags := flag.NewFlagSet("rtt", flag.ContinueOnError) + cmdFlags.Usage = func() { c.Ui.Output(c.Help()) } + + cmdFlags.BoolVar(&wan, "wan", false, "wan") + httpAddr := HTTPAddrFlag(cmdFlags) + if err := cmdFlags.Parse(args); err != nil { + return 1 + } + + // They must provide a pair of nodes. + nodes := cmdFlags.Args() + if len(nodes) != 2 { + c.Ui.Error("Two node names must be specified") + c.Ui.Error("") + c.Ui.Error(c.Help()) + return 1 + } + + // Create and test the HTTP client. + conf := api.DefaultConfig() + conf.Address = *httpAddr + client, err := api.NewClient(conf) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error connecting to Consul agent: %s", err)) + return 1 + } + coordClient := client.Coordinate() + + var source string + var coord1, coord2 *coordinate.Coordinate + if wan { + // Parse the input nodes. + parts1 := strings.Split(nodes[0], ".") + parts2 := strings.Split(nodes[1], ".") + if len(parts1) != 2 || len(parts2) != 2 { + c.Ui.Error("Node names must be specified as . with -wan") + return 1 + } + dc1, node1 := parts1[0], parts1[1] + dc2, node2 := parts2[0], parts2[1] + + // Pull all the WAN coordinates. + dcs, err := coordClient.Datacenters() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error getting coordinates: %s", err)) + return 1 + } + + // See if the requested nodes are in there. + for _, dc := range dcs { + for _, entry := range dc.Coordinates { + if dc.Datacenter == dc1 && entry.Node == node1 { + coord1 = entry.Coord + } + if dc.Datacenter == dc2 && entry.Node == node2 { + coord2 = entry.Coord + } + } + } + source = "WAN" + } else { + // Pull all the LAN coordinates. + entries, _, err := coordClient.Nodes(nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error getting coordinates: %s", err)) + return 1 + } + + // See if the requested nodes are in there. + for _, entry := range entries { + if entry.Node == nodes[0] { + coord1 = entry.Coord + } + if entry.Node == nodes[1] { + coord2 = entry.Coord + } + } + source = "LAN" + } + + // Make sure we found both coordinates. + if coord1 == nil { + c.Ui.Error(fmt.Sprintf("Could not find a coordinate for node %q", nodes[0])) + return 1 + } + if coord2 == nil { + c.Ui.Error(fmt.Sprintf("Could not find a coordinate for node %q", nodes[1])) + return 1 + } + + // Report the round trip time. + dist := coord1.DistanceTo(coord2).Seconds() + c.Ui.Output(fmt.Sprintf("Estimated %s <-> %s rtt=%.3f ms (using %s coordinates)", nodes[0], nodes[1], dist*1000.0, source)) + return 0 +} + +func (c *RttCommand) Synopsis() string { + return "Estimates round trip times between nodes" +} diff --git a/command/rtt_test.go b/command/rtt_test.go new file mode 100644 index 0000000000..4722986792 --- /dev/null +++ b/command/rtt_test.go @@ -0,0 +1,162 @@ +package command + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/hashicorp/consul/command/agent" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/serf/coordinate" + "github.com/mitchellh/cli" +) + +func TestRttCommand_Implements(t *testing.T) { + var _ cli.Command = &RttCommand{} +} + +func TestRttCommand_Run_BadArgs(t *testing.T) { + ui := new(cli.MockUi) + c := &RttCommand{Ui: ui} + + if code := c.Run([]string{}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } + + if code := c.Run([]string{"node1"}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } + + if code := c.Run([]string{"node1", "node2", "node3"}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } + + if code := c.Run([]string{"-wan", "node1", "node2"}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } + + if code := c.Run([]string{"-wan", "dc1.node1", "node2"}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } + + if code := c.Run([]string{"-wan", "node1", "dc1.node2"}); code != 1 { + t.Fatalf("expected return code 1, got %d", code) + } +} + +func TestRttCommand_Run_LAN(t *testing.T) { + updatePeriod := 10 * time.Millisecond + a := testAgentWithConfig(t, func(c *agent.Config) { + c.ConsulConfig.CoordinateUpdatePeriod = updatePeriod + }) + defer a.Shutdown() + waitForLeader(t, a.httpAddr) + + // Inject some known coordinates. + c1 := coordinate.NewCoordinate(coordinate.DefaultConfig()) + c2 := c1.Clone() + c2.Vec[0] = 0.123 + + req1 := structs.CoordinateUpdateRequest{ + Datacenter: a.config.Datacenter, + Node: a.config.NodeName, + Coord: c1, + } + var reply struct{} + if err := a.agent.RPC("Coordinate.Update", &req1, &reply); err != nil { + t.Fatalf("err: %s", err) + } + + req2 := structs.CoordinateUpdateRequest{ + Datacenter: a.config.Datacenter, + Node: "dogs", + Coord: c2, + } + if err := a.agent.RPC("Coordinate.Update", &req2, &reply); err != nil { + t.Fatalf("err: %s", err) + } + + // Wait for the updates to get flushed to the data store. + time.Sleep(2 * updatePeriod) + + ui := new(cli.MockUi) + c := &RttCommand{Ui: ui} + + // Try two known nodes. + func() { + args := []string{ + "-http-addr=" + a.httpAddr, + a.config.NodeName, + "dogs", + } + code := c.Run(args) + if code != 0 { + t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String()) + } + + // Make sure the proper RTT was reported in the output. + dist_str := fmt.Sprintf("%.3f ms", c1.DistanceTo(c2).Seconds()*1000.0) + if !strings.Contains(ui.OutputWriter.String(), dist_str) { + t.Fatalf("bad: %#v", ui.OutputWriter.String()) + } + }() + + // Try an unknown node. + func() { + args := []string{ + "-http-addr=" + a.httpAddr, + a.config.NodeName, + "nope", + } + code := c.Run(args) + if code != 1 { + t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String()) + } + }() +} + +func TestRttCommand_Run_WAN(t *testing.T) { + a := testAgent(t) + defer a.Shutdown() + waitForLeader(t, a.httpAddr) + + ui := new(cli.MockUi) + c := &RttCommand{Ui: ui} + + node := fmt.Sprintf("%s.%s", a.config.Datacenter, a.config.NodeName) + + // We can't easily inject WAN coordinates, so we will just query the + // node with itself. + func() { + args := []string{ + "-http-addr=" + a.httpAddr, + "-wan", + node, + node, + } + code := c.Run(args) + if code != 0 { + t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String()) + } + + // Make sure there was some kind of RTT reported in the output. + if !strings.Contains(ui.OutputWriter.String(), "rtt=") { + t.Fatalf("bad: %#v", ui.OutputWriter.String()) + } + }() + + // Try an unknown node. + func() { + args := []string{ + "-http-addr=" + a.httpAddr, + "-wan", + node, + "dc1.nope", + } + code := c.Run(args) + if code != 1 { + t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String()) + } + }() +} diff --git a/commands.go b/commands.go index eca071390f..592cad2361 100644 --- a/commands.go +++ b/commands.go @@ -114,6 +114,12 @@ func init() { }, nil }, + "rtt": func() (cli.Command, error) { + return &command.RttCommand{ + Ui: ui, + }, nil + }, + "version": func() (cli.Command, error) { ver := Version rel := VersionPrerelease diff --git a/website/source/docs/commands/index.html.markdown b/website/source/docs/commands/index.html.markdown index 0b5c621369..d888d8996f 100644 --- a/website/source/docs/commands/index.html.markdown +++ b/website/source/docs/commands/index.html.markdown @@ -39,6 +39,7 @@ Available commands are: members Lists the members of a Consul cluster monitor Stream logs from a Consul agent reload Triggers the agent to reload configuration files + rtt Estimates round trip times between nodes version Prints the Consul version watch Watch for changes in Consul ``` diff --git a/website/source/docs/commands/rtt.html.markdown b/website/source/docs/commands/rtt.html.markdown new file mode 100644 index 0000000000..3eebb704e5 --- /dev/null +++ b/website/source/docs/commands/rtt.html.markdown @@ -0,0 +1,54 @@ +--- +layout: "docs" +page_title: "Commands: RTT" +sidebar_current: "docs-commands-rtt" +description: > + The `rtt` command estimates the netowrk round trip time between two nodes using Consul's network coordinate model of the cluster. +--- + +# Consul RTT + +Command: `consul rtt` + +The 'rtt' command estimates the network round trip time between two nodes using +Consul's network coordinate model of the cluster. While contacting nodes as part +of its normal gossip protocol, Consul builds up a set of network coordinates for +all the nodes in the local datacenter (the LAN pool) and remote datacenters (the WAN +pool). Agents forward these to the servers and once the coordinates for two nodes +are known, it's possible to estimate the network round trip time between them using +a simple calculation. + +It is not possible to measure between LAN coordinates and WAN coordinates +because they are maintained by independent Serf gossip pools, so they are +not compatible. + +## Usage + +Usage: `consul rtt [options] node1 node2` + +The two node names are required. Note that these are node names as known to +Consul as `consul members` would show, not IP addresses. + +The list of available flags are: + +* `-wan` - Instructs the command to use WAN coordinates instead of LAN + coordinates. If the -wan option is given, then the node names must be prefixed + by the datacenter and a period (eg. "dc1.sever"). By default, the two nodes are + assumed to be nodes in the local datacenter the LAN coordinates are used. + +* `-http-addr` - Address to the HTTP server of the agent you want to contact + to send this command. If this isn't specified, the command will contact + "127.0.0.1:8500" which is the default HTTP address of a Consul agent. + +## Output + +If coordinates are available, the command will print the estimated round trip +time beteeen the given nodes: + +``` +$ consul rtt n1 n2 +Estimated n1 <-> n2 rtt=0.610 ms (using LAN coordinates) + +$ consul rtt -wan dc1.n1 dc2.n2 +Estimated dc1.n1 <-> dc2.n2 rtt=1.275 ms (using WAN coordinates) +```