2024-11-06 17:37:08 +00:00

452 lines
14 KiB
Go

package sentry
import (
"container/ring"
"strconv"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/getsentry/sentry-go/internal/traceparser"
)
// Start a profiler that collects samples continuously, with a buffer of up to 30 seconds.
// Later, you can collect a slice from this buffer, producing a Trace.
func startProfiling(startTime time.Time) profiler {
onProfilerStart()
p := newProfiler(startTime)
// Wait for the profiler to finish setting up before returning to the caller.
started := make(chan struct{})
go p.run(started)
if _, ok := <-started; ok {
return p
}
return nil
}
type profiler interface {
// GetSlice returns a slice of the profiled data between the given times.
GetSlice(startTime, endTime time.Time) *profilerResult
Stop(wait bool)
}
type profilerResult struct {
callerGoID uint64
trace *profileTrace
}
func getCurrentGoID() uint64 {
// We shouldn't panic but let's be super safe.
defer func() {
if err := recover(); err != nil {
Logger.Printf("Profiler panic in getCurrentGoID(): %v\n", err)
}
}()
// Buffer to read the stack trace into. We should be good with a small buffer because we only need the first line.
var stacksBuffer = make([]byte, 100)
var n = runtime.Stack(stacksBuffer, false)
if n > 0 {
var traces = traceparser.Parse(stacksBuffer[0:n])
if traces.Length() > 0 {
var trace = traces.Item(0)
return trace.GoID()
}
}
return 0
}
const profilerSamplingRateHz = 101 // 101 Hz; not 100 Hz because of the lockstep sampling (https://stackoverflow.com/a/45471031/1181370)
const profilerSamplingRate = time.Second / profilerSamplingRateHz
const stackBufferMaxGrowth = 512 * 1024
const stackBufferLimit = 10 * 1024 * 1024
const profilerRuntimeLimit = 30 // seconds
type profileRecorder struct {
startTime time.Time
stopSignal chan struct{}
stopped int64
mutex sync.RWMutex
testProfilerPanic int64
// Map from runtime.StackRecord.Stack0 to an index in stacks.
stackIndexes map[string]int
stacks []profileStack
newStacks []profileStack // New stacks created in the current interation.
stackKeyBuffer []byte
// Map from runtime.Frame.PC to an index in frames.
frameIndexes map[string]int
frames []*Frame
newFrames []*Frame // New frames created in the current interation.
// We keep a ring buffer of 30 seconds worth of samples, so that we can later slice it.
// Each bucket is a slice of samples all taken at the same time.
samplesBucketsHead *ring.Ring
// Buffer to read current stacks - will grow automatically up to stackBufferLimit.
stacksBuffer []byte
}
func newProfiler(startTime time.Time) *profileRecorder {
// Pre-allocate the profile trace for the currently active number of routines & 100 ms worth of samples.
// Other coefficients are just guesses of what might be a good starting point to avoid allocs on short runs.
return &profileRecorder{
startTime: startTime,
stopSignal: make(chan struct{}, 1),
stackIndexes: make(map[string]int, 32),
stacks: make([]profileStack, 0, 32),
newStacks: make([]profileStack, 0, 32),
frameIndexes: make(map[string]int, 128),
frames: make([]*Frame, 0, 128),
newFrames: make([]*Frame, 0, 128),
samplesBucketsHead: ring.New(profilerRuntimeLimit * profilerSamplingRateHz),
// A buffer of 2 KiB per goroutine stack looks like a good starting point (empirically determined).
stacksBuffer: make([]byte, runtime.NumGoroutine()*2048),
}
}
// This allows us to test whether panic during profiling are handled correctly and don't block execution.
// If the number is lower than 0, profilerGoroutine() will panic immedately.
// If the number is higher than 0, profiler.onTick() will panic when the given samples-set index is being collected.
var testProfilerPanic int64
var profilerRunning int64
func (p *profileRecorder) run(started chan struct{}) {
// Code backup for manual test debugging:
// if !atomic.CompareAndSwapInt64(&profilerRunning, 0, 1) {
// panic("Only one profiler can be running at a time")
// }
// We shouldn't panic but let's be super safe.
defer func() {
if err := recover(); err != nil {
Logger.Printf("Profiler panic in run(): %v\n", err)
}
atomic.StoreInt64(&testProfilerPanic, 0)
close(started)
p.stopSignal <- struct{}{}
atomic.StoreInt64(&p.stopped, 1)
atomic.StoreInt64(&profilerRunning, 0)
}()
p.testProfilerPanic = atomic.LoadInt64(&testProfilerPanic)
if p.testProfilerPanic < 0 {
Logger.Printf("Profiler panicking during startup because testProfilerPanic == %v\n", p.testProfilerPanic)
panic("This is an expected panic in profilerGoroutine() during tests")
}
// Collect the first sample immediately.
p.onTick()
// Periodically collect stacks, starting after profilerSamplingRate has passed.
collectTicker := profilerTickerFactory(profilerSamplingRate)
defer collectTicker.Stop()
var tickerChannel = collectTicker.TickSource()
started <- struct{}{}
for {
select {
case <-tickerChannel:
p.onTick()
collectTicker.Ticked()
case <-p.stopSignal:
return
}
}
}
func (p *profileRecorder) Stop(wait bool) {
if atomic.LoadInt64(&p.stopped) == 1 {
return
}
p.stopSignal <- struct{}{}
if wait {
<-p.stopSignal
}
}
func (p *profileRecorder) GetSlice(startTime, endTime time.Time) *profilerResult {
// Unlikely edge cases - profiler wasn't running at all or the given times are invalid in relation to each other.
if p.startTime.After(endTime) || startTime.After(endTime) {
return nil
}
var relativeStartNS = uint64(0)
if p.startTime.Before(startTime) {
relativeStartNS = uint64(startTime.Sub(p.startTime).Nanoseconds())
}
var relativeEndNS = uint64(endTime.Sub(p.startTime).Nanoseconds())
samplesCount, bucketsReversed, trace := p.getBuckets(relativeStartNS, relativeEndNS)
if samplesCount == 0 {
return nil
}
var result = &profilerResult{
callerGoID: getCurrentGoID(),
trace: trace,
}
trace.Samples = make([]profileSample, samplesCount)
trace.ThreadMetadata = make(map[uint64]*profileThreadMetadata, len(bucketsReversed[0].goIDs))
var s = samplesCount - 1
for _, bucket := range bucketsReversed {
var elapsedSinceStartNS = bucket.relativeTimeNS - relativeStartNS
for i, goID := range bucket.goIDs {
trace.Samples[s].ElapsedSinceStartNS = elapsedSinceStartNS
trace.Samples[s].ThreadID = goID
trace.Samples[s].StackID = bucket.stackIDs[i]
s--
if _, goroutineExists := trace.ThreadMetadata[goID]; !goroutineExists {
trace.ThreadMetadata[goID] = &profileThreadMetadata{
Name: "Goroutine " + strconv.FormatUint(goID, 10),
}
}
}
}
return result
}
// Collect all buckets of samples in the given time range while holding a read lock.
func (p *profileRecorder) getBuckets(relativeStartNS, relativeEndNS uint64) (samplesCount int, buckets []*profileSamplesBucket, trace *profileTrace) {
p.mutex.RLock()
defer p.mutex.RUnlock()
// sampleBucketsHead points at the last stored bucket so it's a good starting point to search backwards for the end.
var end = p.samplesBucketsHead
for end.Value != nil && end.Value.(*profileSamplesBucket).relativeTimeNS > relativeEndNS {
end = end.Prev()
}
// Edge case - no items stored before the given endTime.
if end.Value == nil {
return 0, nil, nil
}
{ // Find the first item after the given startTime.
var start = end
var prevBucket *profileSamplesBucket
samplesCount = 0
buckets = make([]*profileSamplesBucket, 0, int64((relativeEndNS-relativeStartNS)/uint64(profilerSamplingRate.Nanoseconds()))+1)
for start.Value != nil {
var bucket = start.Value.(*profileSamplesBucket)
// If this bucket's time is before the requests start time, don't collect it (and stop iterating further).
if bucket.relativeTimeNS < relativeStartNS {
break
}
// If this bucket time is greater than previous the bucket's time, we have exhausted the whole ring buffer
// before we were able to find the start time. That means the start time is not present and we must break.
// This happens if the slice duration exceeds the ring buffer capacity.
if prevBucket != nil && bucket.relativeTimeNS > prevBucket.relativeTimeNS {
break
}
samplesCount += len(bucket.goIDs)
buckets = append(buckets, bucket)
start = start.Prev()
prevBucket = bucket
}
}
// Edge case - if the period requested was too short and we haven't collected enough samples.
if len(buckets) < 2 {
return 0, nil, nil
}
trace = &profileTrace{
Frames: p.frames,
Stacks: p.stacks,
}
return samplesCount, buckets, trace
}
func (p *profileRecorder) onTick() {
elapsedNs := time.Since(p.startTime).Nanoseconds()
if p.testProfilerPanic > 0 {
Logger.Printf("Profiler testProfilerPanic == %v\n", p.testProfilerPanic)
if p.testProfilerPanic == 1 {
Logger.Println("Profiler panicking onTick()")
panic("This is an expected panic in Profiler.OnTick() during tests")
}
p.testProfilerPanic--
}
records := p.collectRecords()
p.processRecords(uint64(elapsedNs), records)
// Free up some memory if we don't need such a large buffer anymore.
if len(p.stacksBuffer) > len(records)*3 {
p.stacksBuffer = make([]byte, len(records)*3)
}
}
func (p *profileRecorder) collectRecords() []byte {
for {
// Capture stacks for all existing goroutines.
// Note: runtime.GoroutineProfile() would be better but we can't use it at the moment because
// it doesn't give us `gid` for each routine, see https://github.com/golang/go/issues/59663
n := runtime.Stack(p.stacksBuffer, true)
// If we couldn't read everything, increase the buffer and try again.
if n >= len(p.stacksBuffer) && n < stackBufferLimit {
var newSize = n * 2
if newSize > n+stackBufferMaxGrowth {
newSize = n + stackBufferMaxGrowth
}
if newSize > stackBufferLimit {
newSize = stackBufferLimit
}
p.stacksBuffer = make([]byte, newSize)
} else {
return p.stacksBuffer[0:n]
}
}
}
func (p *profileRecorder) processRecords(elapsedNs uint64, stacksBuffer []byte) {
var traces = traceparser.Parse(stacksBuffer)
var length = traces.Length()
// Shouldn't happen but let's be safe and don't store empty buckets.
if length == 0 {
return
}
var bucket = &profileSamplesBucket{
relativeTimeNS: elapsedNs,
stackIDs: make([]int, length),
goIDs: make([]uint64, length),
}
// reset buffers
p.newFrames = p.newFrames[:0]
p.newStacks = p.newStacks[:0]
for i := 0; i < length; i++ {
var stack = traces.Item(i)
bucket.stackIDs[i] = p.addStackTrace(stack)
bucket.goIDs[i] = stack.GoID()
}
p.mutex.Lock()
defer p.mutex.Unlock()
p.stacks = append(p.stacks, p.newStacks...)
p.frames = append(p.frames, p.newFrames...)
p.samplesBucketsHead = p.samplesBucketsHead.Next()
p.samplesBucketsHead.Value = bucket
}
func (p *profileRecorder) addStackTrace(capturedStack traceparser.Trace) int {
iter := capturedStack.Frames()
stack := make(profileStack, 0, iter.LengthUpperBound())
// Originally, we've used `capturedStack.UniqueIdentifier()` as a key but that was incorrect because it also
// contains function arguments and we want to group stacks by function name and file/line only.
// Instead, we need to parse frames and we use a list of their indexes as a key.
// We reuse the same buffer for each stack to avoid allocations; this is a hot spot.
var expectedBufferLen = cap(stack) * 5 // 4 bytes per frame + 1 byte for space
if cap(p.stackKeyBuffer) < expectedBufferLen {
p.stackKeyBuffer = make([]byte, 0, expectedBufferLen)
} else {
p.stackKeyBuffer = p.stackKeyBuffer[:0]
}
for iter.HasNext() {
var frame = iter.Next()
if frameIndex := p.addFrame(frame); frameIndex >= 0 {
stack = append(stack, frameIndex)
p.stackKeyBuffer = append(p.stackKeyBuffer, 0) // space
// The following code is just like binary.AppendUvarint() which isn't yet available in Go 1.18.
x := uint64(frameIndex) + 1
for x >= 0x80 {
p.stackKeyBuffer = append(p.stackKeyBuffer, byte(x)|0x80)
x >>= 7
}
p.stackKeyBuffer = append(p.stackKeyBuffer, byte(x))
}
}
stackIndex, exists := p.stackIndexes[string(p.stackKeyBuffer)]
if !exists {
stackIndex = len(p.stacks) + len(p.newStacks)
p.newStacks = append(p.newStacks, stack)
p.stackIndexes[string(p.stackKeyBuffer)] = stackIndex
}
return stackIndex
}
func (p *profileRecorder) addFrame(capturedFrame traceparser.Frame) int {
// NOTE: Don't convert to string yet, it's expensive and compiler can avoid it when
// indexing into a map (only needs a copy when adding a new key to the map).
var key = capturedFrame.UniqueIdentifier()
frameIndex, exists := p.frameIndexes[string(key)]
if !exists {
module, function := splitQualifiedFunctionName(string(capturedFrame.Func()))
file, line := capturedFrame.File()
frame := newFrame(module, function, string(file), line)
frameIndex = len(p.frames) + len(p.newFrames)
p.newFrames = append(p.newFrames, &frame)
p.frameIndexes[string(key)] = frameIndex
}
return frameIndex
}
type profileSamplesBucket struct {
relativeTimeNS uint64
stackIDs []int
goIDs []uint64
}
// A Ticker holds a channel that delivers “ticks” of a clock at intervals.
type profilerTicker interface {
// Stop turns off a ticker. After Stop, no more ticks will be sent.
Stop()
// TickSource returns a read-only channel of ticks.
TickSource() <-chan time.Time
// Ticked is called by the Profiler after a tick is processed to notify the ticker. Used for testing.
Ticked()
}
type timeTicker struct {
*time.Ticker
}
func (t *timeTicker) TickSource() <-chan time.Time {
return t.C
}
func (t *timeTicker) Ticked() {}
func profilerTickerFactoryDefault(d time.Duration) profilerTicker {
return &timeTicker{time.NewTicker(d)}
}
// We allow overriding the ticker for tests. CI is terribly flaky
// because the time.Ticker doesn't guarantee regular ticks - they may come (a lot) later than the given interval.
var profilerTickerFactory = profilerTickerFactoryDefault