debug: use human readable dates for filenames

The unix timestamps that were used make the debug data a little bit more
difficult to consume. By using human readable dates we can easily see
when the profile data was collected.

This commit also improves the test coverage. Two test cases are removed
and the assertions from those cases are moved to TestDebugCommand.

Now TestDebugCommand is able to validate the contents of all files. This
change reduces the test runtime of the command/debug package by almost
50%. It also makes much more strict assertions about the contents by
using gotest.tools/v3/fs.
This commit is contained in:
Daniel Nephin 2021-08-06 17:17:25 -04:00
parent 2f8d0e12cf
commit 797ee061e4
2 changed files with 121 additions and 206 deletions

View File

@ -85,6 +85,9 @@ type cmd struct {
// validateTiming can be used to skip validation of interval, duration. This
// is primarily useful for testing
validateTiming bool
// timeNow is a shim for testing, it is used to generate the time used in
// file paths.
timeNow func() time.Time
}
// debugIndex is used to manage the summary of all data recorded
@ -102,10 +105,16 @@ type debugIndex struct {
Targets []string
}
// timeDateformat is a modified version of time.RFC3339 which replaces colons with
// hyphens. This is to make it more convenient to untar these files, because
// tar assumes colons indicate the file is on a remote host, unless --force-local
// is used.
const timeDateFormat = "2006-01-02T15-04-05Z0700"
func (c *cmd) init() {
c.flags = flag.NewFlagSet("", flag.ContinueOnError)
defaultFilename := fmt.Sprintf("consul-debug-%d", time.Now().Unix())
defaultFilename := fmt.Sprintf("consul-debug-%v", time.Now().Format(timeDateFormat))
c.flags.Var((*flags.AppendSliceValue)(&c.capture), "capture",
fmt.Sprintf("One or more types of information to capture. This can be used "+
@ -130,6 +139,9 @@ func (c *cmd) init() {
c.help = flags.Usage(help, c.flags)
c.validateTiming = true
c.timeNow = func() time.Time {
return time.Now().UTC()
}
}
func (c *cmd) Run(args []string) int {
@ -208,7 +220,7 @@ func (c *cmd) Run(args []string) int {
Duration: c.duration.String(),
Targets: c.capture,
}
if err := writeJSONFile(fmt.Sprintf("%s/index.json", c.output), index); err != nil {
if err := writeJSONFile(filepath.Join(c.output, "index.json"), index); err != nil {
c.UI.Error(fmt.Sprintf("Error creating index document: %v", err))
return 1
}
@ -334,13 +346,13 @@ func (c *cmd) captureInterval(ctx context.Context) error {
durationChn := time.After(c.duration)
intervalCount := 0
c.UI.Output(fmt.Sprintf("Beginning capture interval %s (%d)", time.Now().Local(), intervalCount))
c.UI.Output(fmt.Sprintf("Beginning capture interval %s (%d)", time.Now().UTC(), intervalCount))
err := captureShortLived(c)
if err != nil {
return err
}
c.UI.Output(fmt.Sprintf("Capture successful %s (%d)", time.Now().Local(), intervalCount))
c.UI.Output(fmt.Sprintf("Capture successful %s (%d)", time.Now().UTC(), intervalCount))
for {
select {
case t := <-intervalChn.C:
@ -349,7 +361,7 @@ func (c *cmd) captureInterval(ctx context.Context) error {
if err != nil {
return err
}
c.UI.Output(fmt.Sprintf("Capture successful %s (%d)", t.Local(), intervalCount))
c.UI.Output(fmt.Sprintf("Capture successful %s (%d)", t.UTC(), intervalCount))
case <-durationChn:
intervalChn.Stop()
return nil
@ -361,43 +373,32 @@ func (c *cmd) captureInterval(ctx context.Context) error {
func captureShortLived(c *cmd) error {
g := new(errgroup.Group)
timestamp := time.Now().Local().Unix()
timestampDir, err := c.createTimestampDir(timestamp)
dir, err := makeIntervalDir(c.output, c.timeNow())
if err != nil {
return err
}
if c.captureTarget(targetProfiles) {
g.Go(func() error {
return c.captureHeap(timestampDir)
return c.captureHeap(dir)
})
g.Go(func() error {
return c.captureGoRoutines(timestampDir)
return c.captureGoRoutines(dir)
})
}
return g.Wait()
}
func (c *cmd) createTimestampDir(timestamp int64) (string, error) {
// Make the directory that will store all captured data
// for this interval
timestampDir := fmt.Sprintf("%s/%d", c.output, timestamp)
err := os.MkdirAll(timestampDir, 0755)
if err != nil {
return "", err
func makeIntervalDir(base string, now time.Time) (string, error) {
dir := filepath.Join(base, now.Format(timeDateFormat))
if err := os.MkdirAll(dir, 0755); err != nil {
return "", fmt.Errorf("failed to create output directory %v: %w", dir, err)
}
return timestampDir, nil
return dir, nil
}
func (c *cmd) captureLongRunning(ctx context.Context) error {
timestamp := time.Now().Local().Unix()
timestampDir, err := c.createTimestampDir(timestamp)
if err != nil {
return err
}
g := new(errgroup.Group)
// Capture a profile/trace with a minimum of 1s
s := c.duration.Seconds()
@ -407,42 +408,42 @@ func (c *cmd) captureLongRunning(ctx context.Context) error {
if c.captureTarget(targetProfiles) {
g.Go(func() error {
// use ctx without a timeout to allow the profile to finish sending
return c.captureProfile(ctx, s, timestampDir)
return c.captureProfile(ctx, s)
})
g.Go(func() error {
// use ctx without a timeout to allow the trace to finish sending
return c.captureTrace(ctx, s, timestampDir)
return c.captureTrace(ctx, s)
})
}
if c.captureTarget(targetLogs) {
g.Go(func() error {
ctx, cancel := context.WithTimeout(ctx, c.duration)
defer cancel()
return c.captureLogs(ctx, timestampDir)
return c.captureLogs(ctx)
})
}
if c.captureTarget(targetMetrics) {
g.Go(func() error {
ctx, cancel := context.WithTimeout(ctx, c.duration)
defer cancel()
return c.captureMetrics(ctx, timestampDir)
return c.captureMetrics(ctx)
})
}
return g.Wait()
}
func (c *cmd) captureGoRoutines(timestampDir string) error {
func (c *cmd) captureGoRoutines(outputDir string) error {
gr, err := c.client.Debug().Goroutine()
if err != nil {
return fmt.Errorf("failed to collect goroutine profile: %w", err)
}
return ioutil.WriteFile(fmt.Sprintf("%s/goroutine.prof", timestampDir), gr, 0644)
return ioutil.WriteFile(filepath.Join(outputDir, "goroutine.prof"), gr, 0644)
}
func (c *cmd) captureTrace(ctx context.Context, s float64, timestampDir string) error {
func (c *cmd) captureTrace(ctx context.Context, s float64) error {
prof, err := c.client.Debug().PProf(ctx, "trace", int(s))
if err != nil {
return fmt.Errorf("failed to collect cpu profile: %w", err)
@ -450,7 +451,7 @@ func (c *cmd) captureTrace(ctx context.Context, s float64, timestampDir string)
defer prof.Close()
r := bufio.NewReader(prof)
fh, err := os.Create(fmt.Sprintf("%s/trace.out", timestampDir))
fh, err := os.Create(filepath.Join(c.output, "trace.out"))
if err != nil {
return err
}
@ -459,7 +460,7 @@ func (c *cmd) captureTrace(ctx context.Context, s float64, timestampDir string)
return err
}
func (c *cmd) captureProfile(ctx context.Context, s float64, timestampDir string) error {
func (c *cmd) captureProfile(ctx context.Context, s float64) error {
prof, err := c.client.Debug().PProf(ctx, "profile", int(s))
if err != nil {
return fmt.Errorf("failed to collect cpu profile: %w", err)
@ -467,7 +468,7 @@ func (c *cmd) captureProfile(ctx context.Context, s float64, timestampDir string
defer prof.Close()
r := bufio.NewReader(prof)
fh, err := os.Create(fmt.Sprintf("%s/profile.prof", timestampDir))
fh, err := os.Create(filepath.Join(c.output, "profile.prof"))
if err != nil {
return err
}
@ -476,23 +477,23 @@ func (c *cmd) captureProfile(ctx context.Context, s float64, timestampDir string
return err
}
func (c *cmd) captureHeap(timestampDir string) error {
func (c *cmd) captureHeap(outputDir string) error {
heap, err := c.client.Debug().Heap()
if err != nil {
return fmt.Errorf("failed to collect heap profile: %w", err)
}
return ioutil.WriteFile(fmt.Sprintf("%s/heap.prof", timestampDir), heap, 0644)
return ioutil.WriteFile(filepath.Join(outputDir, "heap.prof"), heap, 0644)
}
func (c *cmd) captureLogs(ctx context.Context, timestampDir string) error {
func (c *cmd) captureLogs(ctx context.Context) error {
logCh, err := c.client.Agent().Monitor("DEBUG", ctx.Done(), nil)
if err != nil {
return err
}
// Create the log file for writing
f, err := os.Create(fmt.Sprintf("%s/%s", timestampDir, "consul.log"))
f, err := os.Create(filepath.Join(c.output, "consul.log"))
if err != nil {
return err
}
@ -513,15 +514,14 @@ func (c *cmd) captureLogs(ctx context.Context, timestampDir string) error {
}
}
func (c *cmd) captureMetrics(ctx context.Context, timestampDir string) error {
func (c *cmd) captureMetrics(ctx context.Context) error {
stream, err := c.client.Agent().MetricsStream(ctx)
if err != nil {
return err
}
defer stream.Close()
filename := fmt.Sprintf("%s/%s.json", timestampDir, "metrics")
fh, err := os.Create(filename)
fh, err := os.Create(filepath.Join(c.output, "metrics.json"))
if err != nil {
return fmt.Errorf("failed to create metrics file: %w", err)
}

View File

@ -3,10 +3,11 @@ package debug
import (
"archive/tar"
"bufio"
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"regexp"
@ -18,6 +19,7 @@ import (
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
"gotest.tools/v3/assert"
"gotest.tools/v3/assert/cmp"
"gotest.tools/v3/fs"
"github.com/hashicorp/consul/agent"
@ -49,12 +51,17 @@ func TestDebugCommand(t *testing.T) {
cmd := New(ui)
cmd.validateTiming = false
it := &incrementalTime{
base: time.Date(2021, 7, 8, 9, 10, 11, 0, time.UTC),
}
cmd.timeNow = it.Now
outputPath := fmt.Sprintf("%s/debug", testDir)
args := []string{
"-http-addr=" + a.HTTPAddr(),
"-output=" + outputPath,
"-duration=100ms",
"-interval=50ms",
"-duration=2s",
"-interval=1s",
"-archive=false",
}
@ -64,17 +71,75 @@ func TestDebugCommand(t *testing.T) {
expected := fs.Expected(t,
fs.WithDir("debug",
fs.WithFile("agent.json", "", fs.MatchAnyFileContent),
fs.WithFile("host.json", "", fs.MatchAnyFileContent),
fs.WithFile("index.json", "", fs.MatchAnyFileContent),
fs.WithFile("members.json", "", fs.MatchAnyFileContent),
// TODO: make the sub-directory names predictable)
fs.WithFile("index.json", "", fs.MatchFileContent(validIndexJSON)),
fs.WithFile("agent.json", "", fs.MatchFileContent(validJSON)),
fs.WithFile("host.json", "", fs.MatchFileContent(validJSON)),
fs.WithFile("members.json", "", fs.MatchFileContent(validJSON)),
fs.WithFile("metrics.json", "", fs.MatchAnyFileContent),
fs.WithFile("consul.log", "", fs.MatchFileContent(validLogFile)),
fs.WithFile("profile.prof", "", fs.MatchFileContent(validProfileData)),
fs.WithFile("trace.out", "", fs.MatchAnyFileContent),
fs.WithDir("2021-07-08T09-10-12Z",
fs.WithFile("goroutine.prof", "", fs.MatchFileContent(validProfileData)),
fs.WithFile("heap.prof", "", fs.MatchFileContent(validProfileData))),
fs.WithDir("2021-07-08T09-10-13Z",
fs.WithFile("goroutine.prof", "", fs.MatchFileContent(validProfileData)),
fs.WithFile("heap.prof", "", fs.MatchFileContent(validProfileData))),
// Ignore the extra directories, they should be the same as the first two
fs.MatchExtraFiles))
assert.Assert(t, fs.Equal(testDir, expected))
metricsFiles, err := filepath.Glob(fmt.Sprintf("%s/*/%s", outputPath, "metrics.json"))
require.NoError(t, err)
require.Len(t, metricsFiles, 1)
require.Equal(t, "", ui.ErrorWriter.String(), "expected no error output")
}
func validLogFile(raw []byte) fs.CompareResult {
scanner := bufio.NewScanner(bytes.NewReader(raw))
for scanner.Scan() {
logLine := scanner.Text()
if !validateLogLine([]byte(logLine)) {
return cmp.ResultFailure(fmt.Sprintf("log line is not valid %s", logLine))
}
}
if scanner.Err() != nil {
return cmp.ResultFailure(scanner.Err().Error())
}
return cmp.ResultSuccess
}
func validIndexJSON(raw []byte) fs.CompareResult {
var target debugIndex
decoder := json.NewDecoder(bytes.NewReader(raw))
decoder.DisallowUnknownFields()
if err := decoder.Decode(&target); err != nil {
return cmp.ResultFailure(err.Error())
}
return cmp.ResultSuccess
}
func validJSON(raw []byte) fs.CompareResult {
var target interface{}
decoder := json.NewDecoder(bytes.NewReader(raw))
if err := decoder.Decode(&target); err != nil {
return cmp.ResultFailure(err.Error())
}
return cmp.ResultSuccess
}
func validProfileData(raw []byte) fs.CompareResult {
if _, err := profile.ParseData(raw); err != nil {
return cmp.ResultFailure(err.Error())
}
return cmp.ResultSuccess
}
type incrementalTime struct {
base time.Time
next uint64
}
func (t *incrementalTime) Now() time.Time {
t.next++
return t.base.Add(time.Duration(t.next) * time.Second)
}
func TestDebugCommand_Archive(t *testing.T) {
@ -267,11 +332,11 @@ func TestDebugCommand_CaptureTargets(t *testing.T) {
"static": {
[]string{"agent", "host", "cluster"},
[]string{"agent.json", "host.json", "members.json"},
[]string{"*/metrics.json"},
[]string{"metrics.json"},
},
"metrics-only": {
[]string{"metrics"},
[]string{"*/metrics.json"},
[]string{"metrics.json"},
[]string{"agent.json", "host.json", "members.json"},
},
"all-but-pprof": {
@ -286,8 +351,8 @@ func TestDebugCommand_CaptureTargets(t *testing.T) {
"host.json",
"agent.json",
"members.json",
"*/metrics.json",
"*/consul.log",
"metrics.json",
"consul.log",
},
[]string{},
},
@ -356,100 +421,6 @@ func TestDebugCommand_CaptureTargets(t *testing.T) {
}
}
func TestDebugCommand_CaptureLogs(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
cases := map[string]struct {
// used in -target param
targets []string
// existence verified after execution
files []string
// non-existence verified after execution
excludedFiles []string
}{
"logs-only": {
[]string{"logs"},
[]string{"*/consul.log"},
[]string{"agent.json", "host.json", "cluster.json", "*/metrics.json"},
},
}
for name, tc := range cases {
testDir := testutil.TempDir(t, "debug")
a := agent.NewTestAgent(t, `
enable_debug = true
`)
defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1")
ui := cli.NewMockUi()
cmd := New(ui)
cmd.validateTiming = false
outputPath := fmt.Sprintf("%s/debug-%s", testDir, name)
args := []string{
"-http-addr=" + a.HTTPAddr(),
"-output=" + outputPath,
"-archive=false",
"-duration=1000ms",
"-interval=50ms",
}
for _, t := range tc.targets {
args = append(args, "-capture="+t)
}
if code := cmd.Run(args); code != 0 {
t.Fatalf("should exit 0, got code: %d", code)
}
errOutput := ui.ErrorWriter.String()
if errOutput != "" {
t.Errorf("expected no error output, got %q", errOutput)
}
// Ensure the debug data was written
_, err := os.Stat(outputPath)
if err != nil {
t.Fatalf("output path should exist: %s", err)
}
// Ensure the captured static files exist
for _, f := range tc.files {
path := fmt.Sprintf("%s/%s", outputPath, f)
// Glob ignores file system errors
fs, _ := filepath.Glob(path)
if len(fs) <= 0 {
t.Fatalf("%s: output data should exist for %s", name, f)
}
for _, logFile := range fs {
content, err := ioutil.ReadFile(logFile)
require.NoError(t, err)
scanner := bufio.NewScanner(strings.NewReader(string(content)))
for scanner.Scan() {
logLine := scanner.Text()
if !validateLogLine([]byte(logLine)) {
t.Fatalf("%s: log line is not valid %s", name, logLine)
}
}
}
}
// Ensure any excluded files do not exist
for _, f := range tc.excludedFiles {
path := fmt.Sprintf("%s/%s", outputPath, f)
// Glob ignores file system errors
fs, _ := filepath.Glob(path)
if len(fs) > 0 {
t.Fatalf("%s: output data should not exist for %s", name, f)
}
}
}
}
func validateLogLine(content []byte) bool {
fields := strings.SplitN(string(content), " ", 2)
if len(fields) != 2 {
@ -466,62 +437,6 @@ func validateLogLine(content []byte) bool {
return valid
}
func TestDebugCommand_ProfilesExist(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
testDir := testutil.TempDir(t, "debug")
a := agent.NewTestAgent(t, `
enable_debug = true
`)
defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1")
ui := cli.NewMockUi()
cmd := New(ui)
cmd.validateTiming = false
outputPath := fmt.Sprintf("%s/debug", testDir)
println(outputPath)
args := []string{
"-http-addr=" + a.HTTPAddr(),
"-output=" + outputPath,
// CPU profile has a minimum of 1s
"-archive=false",
"-duration=2s",
"-interval=1s",
"-capture=pprof",
}
if code := cmd.Run(args); code != 0 {
t.Fatalf("should exit 0, got code: %d", code)
}
profiles := []string{"heap.prof", "profile.prof", "goroutine.prof", "trace.out"}
// Glob ignores file system errors
for _, v := range profiles {
fs, _ := filepath.Glob(fmt.Sprintf("%s/*/%s", outputPath, v))
if len(fs) < 1 {
t.Errorf("output data should exist for %s", v)
}
for _, f := range fs {
if !strings.Contains(f, "trace.out") {
content, err := ioutil.ReadFile(f)
require.NoError(t, err)
_, err = profile.ParseData(content)
require.NoError(t, err)
}
}
}
errOutput := ui.ErrorWriter.String()
if errOutput != "" {
t.Errorf("expected no error output, got %s", errOutput)
}
}
func TestDebugCommand_Prepare_ValidateTiming(t *testing.T) {
cases := map[string]struct {
duration string