import std/algorithm import std/enumerate import std/sequtils import std/times import asyncprofiler import metrics when defined(metrics): type AsyncProfilerInfo* = ref object of RootObj perfSampler: PerfSampler sampleInterval: times.Duration clock: Clock k: int init: bool lastSample: Time collections*: uint PerfSampler = proc (): MetricsSummary {.raises: [].} Clock = proc (): Time {.raises: [].} ProfilerMetric = (SrcLoc, OverallMetrics) const locationLabels = ["proc", "file", "line"] # Per-proc Metrics declarePublicGauge( chronos_exec_time_total, "total time in which this proc actively occupied the event loop thread", labels = locationLabels, ) declarePublicGauge( chronos_run_time_total, "chronos_exec_time_total of this proc plus of all its children (procs" & "that this proc called and awaited for)", labels = locationLabels, ) declarePublicGauge( chronos_wall_time_total, "the amount of time elapsed from when the async proc was started to when" & "it completed", labels = locationLabels, ) declarePublicGauge( chronos_call_count_total, "the total number of times this async proc was called and completed", labels = locationLabels, ) # Per-proc Statistics declarePublicGauge( chronos_single_exec_time_max, "the maximum execution time for a single call of this proc", labels = locationLabels, ) # Global Statistics declarePublicGauge( chronos_largest_exec_time_total, "the largest chronos_exec_time_total of all procs", ) declarePublicGauge( chronos_largest_exec_time_max, "the largest chronos_single_exec_time_max of all procs", ) # Keeps track of the thread initializing the module. This is the only thread # that will be allowed to interact with the metrics collector. let moduleInitThread = getThreadId() proc newCollector*( AsyncProfilerInfo: typedesc, perfSampler: PerfSampler, clock: Clock, sampleInterval: times.Duration, k: int = 10, ): AsyncProfilerInfo = AsyncProfilerInfo( perfSampler: perfSampler, clock: clock, k: k, sampleInterval: sampleInterval, init: true, lastSample: low(Time), ) proc collectSlowestProcs( self: AsyncProfilerInfo, profilerMetrics: seq[ProfilerMetric], timestampMillis: int64, k: int, ): void = for (i, pair) in enumerate(profilerMetrics): if i == k: break let (location, metrics) = pair let locationLabels = @[ $(location.procedure), $(location.file), $(location.line), ] chronos_exec_time_total.set(metrics.totalExecTime.nanoseconds, labelValues = locationLabels) chronos_run_time_total.set(metrics.totalRunTime.nanoseconds, labelValues = locationLabels) chronos_wall_time_total.set(metrics.totalWallTime.nanoseconds, labelValues = locationLabels) chronos_single_exec_time_max.set(metrics.maxSingleTime.nanoseconds, labelValues = locationLabels) chronos_call_count_total.set(metrics.count, labelValues = locationLabels) proc collectOutlierMetrics( self: AsyncProfilerInfo, profilerMetrics: seq[ProfilerMetric], timestampMillis: int64, ): void = ## Adds summary metrics for the procs that have the highest exec time ## (which stops the async loop) and the highest max exec time. This can ## help spot outliers. var largestExecTime = low(timer.Duration) var largestMaxExecTime = low(timer.Duration) for (_, metric) in profilerMetrics: if metric.maxSingleTime > largestMaxExecTime: largestMaxExecTime = metric.maxSingleTime if metric.totalExecTime > largestExecTime: largestExecTime = metric.totalExecTime chronos_largest_exec_time_total.set(largestExecTime.nanoseconds) chronos_largest_exec_time_max.set(largestMaxExecTime.nanoseconds) proc collect*(self: AsyncProfilerInfo, force: bool = false): void = # Calling this method from the wrong thread has happened a lot in the past, # so this makes sure we're not doing anything funny. assert getThreadId() == moduleInitThread, "You cannot call collect() from" & " a thread other than the one that initialized the metricscolletor module" let now = self.clock() if not force and (now - self.lastSample < self.sampleInterval): return self.collections += 1 var currentMetrics = self. perfSampler(). pairs. toSeq. map( proc (pair: (ptr SrcLoc, OverallMetrics)): ProfilerMetric = (pair[0][], pair[1]) ). # We don't scoop metrics with 0 exec time as we have a limited number of # prometheus slots, and those are less likely to be useful in debugging # Chronos performance issues. filter( proc (pair: ProfilerMetric): bool = pair[1].totalExecTime.nanoseconds > 0 ). sorted( proc (a, b: ProfilerMetric): int = cmp(a[1].totalExecTime, b[1].totalExecTime), order = SortOrder.Descending ) self.collectOutlierMetrics(currentMetrics, now.toMilliseconds()) self.collectSlowestProcs(currentMetrics, now.toMilliseconds(), self.k) self.lastSample = now proc resetMetric(gauge: Gauge): void = # We try to be as conservative as possible and not write directly to # internal state. We do need to read from it, though. for labelValues in gauge.metrics.keys: gauge.set(0.int64, labelValues = labelValues) proc reset*(self: AsyncProfilerInfo): void = resetMetric(chronos_exec_time_total) resetMetric(chronos_run_time_total) resetMetric(chronos_wall_time_total) resetMetric(chronos_call_count_total) resetMetric(chronos_single_exec_time_max) resetMetric(chronos_largest_exec_time_total) resetMetric(chronos_largest_exec_time_max) var asyncProfilerInfo* {.global.}: AsyncProfilerInfo proc initDefault*(AsyncProfilerInfo: typedesc, k: int) = assert getThreadId() == moduleInitThread, "You cannot call " & "initDefault() from a thread other than the one that initialized the " & "metricscolletor module." asyncProfilerInfo = AsyncProfilerInfo.newCollector( perfSampler = proc (): MetricsSummary = profiler.getFutureSummaryMetrics(), k = k, # We want to collect metrics every 5 seconds. sampleInterval = initDuration(seconds = 5), clock = proc (): Time = getTime(), ) profiler.setChangeCallback(proc (): void = asyncProfilerInfo.collect())