diff --git a/.changelog/12359.txt b/.changelog/12359.txt new file mode 100644 index 0000000000..6c6c3e4511 --- /dev/null +++ b/.changelog/12359.txt @@ -0,0 +1,3 @@ +```release-note:improvement +debug: reduce the capture time for trace to only a single interval instead of the full duration to make trace.out easier to open without running into OOM errors. +``` diff --git a/command/debug/debug.go b/command/debug/debug.go index 1749d76b29..587462a4b7 100644 --- a/command/debug/debug.go +++ b/command/debug/debug.go @@ -400,20 +400,16 @@ func makeIntervalDir(base string, now time.Time) (string, error) { func (c *cmd) captureLongRunning(ctx context.Context) error { g := new(errgroup.Group) - // Capture a profile/trace with a minimum of 1s - s := c.duration.Seconds() - if s < 1 { - s = 1 - } + if c.captureTarget(targetProfiles) { g.Go(func() error { // use ctx without a timeout to allow the profile to finish sending - return c.captureProfile(ctx, s) + return c.captureProfile(ctx, c.duration.Seconds()) }) g.Go(func() error { // use ctx without a timeout to allow the trace to finish sending - return c.captureTrace(ctx, s) + return c.captureTrace(ctx, int(c.interval.Seconds())) }) } if c.captureTarget(targetLogs) { @@ -443,8 +439,8 @@ func (c *cmd) captureGoRoutines(outputDir string) error { return ioutil.WriteFile(filepath.Join(outputDir, "goroutine.prof"), gr, 0644) } -func (c *cmd) captureTrace(ctx context.Context, s float64) error { - prof, err := c.client.Debug().PProf(ctx, "trace", int(s)) +func (c *cmd) captureTrace(ctx context.Context, duration int) error { + prof, err := c.client.Debug().PProf(ctx, "trace", duration) if err != nil { return fmt.Errorf("failed to collect cpu profile: %w", err) } diff --git a/website/content/commands/debug.mdx b/website/content/commands/debug.mdx index ded40e58e6..58434cb16a 100644 --- a/website/content/commands/debug.mdx +++ b/website/content/commands/debug.mdx @@ -78,7 +78,7 @@ information when `debug` is running. By default, it captures all information. | `members` | A list of all the WAN and LAN members in the cluster. | | `metrics` | Metrics from the in-memory metrics endpoint in the target, captured at the interval. | | `logs` | `DEBUG` level logs for the target agent, captured for the duration. | -| `pprof` | Golang heap, CPU, goroutine, and trace profiling. CPU and traces are captured for `duration` in a single file while heap and goroutine are separate snapshots for each `interval`. This information is not retrieved unless [`enable_debug`](/docs/agent/options#enable_debug) is set to `true` on the target agent or ACLs are enable and an ACL token with `operator:read` is provided. | +| `pprof` | Golang heap, CPU, goroutine, and trace profiling. CPU profile is captured for `duration` in a single file, trace is captured for a single `interval`, while heap and goroutine are separate snapshots for each `interval`. This information is not retrieved unless [`enable_debug`](/docs/agent/options#enable_debug) is set to `true` on the target agent or ACLs are enable and an ACL token with `operator:read` is provided. | ## Examples