Merge pull request #12359 from hashicorp/dnephin/fix-debug-size

debug: limit the size of the trace
This commit is contained in:
Daniel Nephin 2022-02-15 18:33:46 -05:00 committed by GitHub
commit 8110ecc93b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 10 deletions

3
.changelog/12359.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
debug: reduce the capture time for trace to only a single interval instead of the full duration to make trace.out easier to open without running into OOM errors.
```

View File

@ -400,20 +400,16 @@ func makeIntervalDir(base string, now time.Time) (string, error) {
func (c *cmd) captureLongRunning(ctx context.Context) error { func (c *cmd) captureLongRunning(ctx context.Context) error {
g := new(errgroup.Group) g := new(errgroup.Group)
// Capture a profile/trace with a minimum of 1s
s := c.duration.Seconds()
if s < 1 {
s = 1
}
if c.captureTarget(targetProfiles) { if c.captureTarget(targetProfiles) {
g.Go(func() error { g.Go(func() error {
// use ctx without a timeout to allow the profile to finish sending // use ctx without a timeout to allow the profile to finish sending
return c.captureProfile(ctx, s) return c.captureProfile(ctx, c.duration.Seconds())
}) })
g.Go(func() error { g.Go(func() error {
// use ctx without a timeout to allow the trace to finish sending // use ctx without a timeout to allow the trace to finish sending
return c.captureTrace(ctx, s) return c.captureTrace(ctx, int(c.interval.Seconds()))
}) })
} }
if c.captureTarget(targetLogs) { if c.captureTarget(targetLogs) {
@ -443,8 +439,8 @@ func (c *cmd) captureGoRoutines(outputDir string) error {
return ioutil.WriteFile(filepath.Join(outputDir, "goroutine.prof"), gr, 0644) return ioutil.WriteFile(filepath.Join(outputDir, "goroutine.prof"), gr, 0644)
} }
func (c *cmd) captureTrace(ctx context.Context, s float64) error { func (c *cmd) captureTrace(ctx context.Context, duration int) error {
prof, err := c.client.Debug().PProf(ctx, "trace", int(s)) prof, err := c.client.Debug().PProf(ctx, "trace", duration)
if err != nil { if err != nil {
return fmt.Errorf("failed to collect cpu profile: %w", err) return fmt.Errorf("failed to collect cpu profile: %w", err)
} }

View File

@ -78,7 +78,7 @@ information when `debug` is running. By default, it captures all information.
| `members` | A list of all the WAN and LAN members in the cluster. | | `members` | A list of all the WAN and LAN members in the cluster. |
| `metrics` | Metrics from the in-memory metrics endpoint in the target, captured at the interval. | | `metrics` | Metrics from the in-memory metrics endpoint in the target, captured at the interval. |
| `logs` | `DEBUG` level logs for the target agent, captured for the duration. | | `logs` | `DEBUG` level logs for the target agent, captured for the duration. |
| `pprof` | Golang heap, CPU, goroutine, and trace profiling. CPU and traces are captured for `duration` in a single file while heap and goroutine are separate snapshots for each `interval`. This information is not retrieved unless [`enable_debug`](/docs/agent/options#enable_debug) is set to `true` on the target agent or ACLs are enable and an ACL token with `operator:read` is provided. | | `pprof` | Golang heap, CPU, goroutine, and trace profiling. CPU profile is captured for `duration` in a single file, trace is captured for a single `interval`, while heap and goroutine are separate snapshots for each `interval`. This information is not retrieved unless [`enable_debug`](/docs/agent/options#enable_debug) is set to `true` on the target agent or ACLs are enable and an ACL token with `operator:read` is provided. |
## Examples ## Examples