nim-codex/codex/utils/asyncprofiler/metricscollector.nim

import std/algorithm
import std/enumerate
import std/sequtils
import std/times

import asyncprofiler
import metrics

when defined(metrics):
  type
    ProfilingCollector* = ref object of Gauge
      perfSampler: PerfSampler
      k: int

    PerfSampler = proc (): MetricsSummary {.raises: [].}

    ProfilerMetric = (SrcLoc, OverallMetrics)

  proc newCollector*(
    ProfilingCollector: typedesc,
    name: string,
    help: string,
    perfSampler: PerfSampler,
    k: int = 10,
    registry: Registry = defaultRegistry,
  ): ProfilingCollector =
    result = ProfilingCollector.newCollector(
      name = name, help = help, registry = registry)
    result.perfSampler = perfSampler
    result.k = k

  proc metricValue(duration: timer.Duration):
    float64 = duration.nanoseconds.float64

  proc collectSlowestProcs(
    self: ProfilingCollector,
    profilerMetrics: seq[ProfilerMetric],
    prometheusMetrics: var Metrics,
    timestampMillis: int64,
    k: int,
  ): void =

    const locationLabelsKeys = @["proc", "file", "line"]

    for (i, pair) in enumerate(profilerMetrics):
      if i == k:
        break

      let (location, metrics) = pair

      proc addLabeledMetric(name: string,
        value: timer.Duration,
        prometheusMetrics: var Metrics): void =
        let labelValues = @[
          $(location.procedure),
          $(location.file),
          $(location.line),
        ]

        var procMetrics = prometheusMetrics.mGetOrPut(labelValues, @[])

        procMetrics.add(
          Metric(
            name: name,
            value: value.metricValue(),
            labels: locationLabelsKeys,
            labelValues: labelValues,
          )
        )

        # If you don't reassign, your modifications are simply lost due to nim's
        # weird var semantics.
        prometheusMetrics[labelValues] = procMetrics

      addLabeledMetric(
        "total_exec_time", metrics.totalExecTime, prometheusMetrics)
      addLabeledMetric(
        "total_run_time", metrics.totalRunTime, prometheusMetrics)
      addLabeledMetric(
        "total_wall_time", metrics.totalWallTime, prometheusMetrics)
      addLabeledMetric(
        "max_single_exec_time", metrics.maxSingleTime, prometheusMetrics)

  proc collectOutlierMetrics(
    self: ProfilingCollector,
    profilerMetrics: seq[ProfilerMetric],
    prometheusMetrics: var Metrics,
    timestampMillis: int64,
  ): void =
    ## Adds summary metrics for the procs that have the highest exec time
    ## (which stops the async loop) and the highest max exec time. This can
    ## help spot outliers.

    var largestExecTime = low(timer.Duration)
    var largestMaxExecTime = low(timer.Duration)

    for (_, metric) in profilerMetrics:
      if metric.maxSingleTime > largestMaxExecTime:
        largestMaxExecTime = metric.maxSingleTime
      if metric.totalExecTime > largestExecTime:
        largestExecTime = metric.totalExecTime

    prometheusMetrics[@[]].add(Metric(
      name: "largest_total_exec_time",
      value: largestExecTime.metricValue(),
      timestamp: timestampMillis,
    ))

    prometheusMetrics[@[]].add(Metric(
      name: "largest_max_exec_time",
      value: largestMaxExecTime.metricValue(),
      timestamp: timestampMillis,
    ))

  method collect*(self: ProfilingCollector): Metrics =
    let now = times.getTime().toMilliseconds()

    var prometheusMetrics = Metrics()
    prometheusMetrics[@[]] = newSeq[Metric]()

    # Samples the underlying metrics and orders pairs by total execution time.
    var currentMetrics = self.
      perfSampler().
      pairs.
      toSeq.
      map(
        proc (pair: (ptr SrcLoc, OverallMetrics)): ProfilerMetric =
          (pair[0][], pair[1])
      ).
      sorted(
        proc (a, b: ProfilerMetric): int =
          cmp(a[1].totalExecTime, b[1].totalExecTime),
        order = SortOrder.Descending
      )

    # otherwise the compiler keeps complaining of a phantom KeyError
    {.cast(raises:[]).}:
      self.collectOutlierMetrics(currentMetrics, prometheusMetrics, now)
      self.collectSlowestProcs(currentMetrics, prometheusMetrics, now, self.k)

    prometheusMetrics
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`import std/algorithm`
			`import std/enumerate`
			`import std/sequtils`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`import std/times`

			`import asyncprofiler`
			`import metrics`

			`when defined(metrics):`
			`type`
			`ProfilingCollector* = ref object of Gauge`
			`perfSampler: PerfSampler`
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`k: int`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00
			`PerfSampler = proc (): MetricsSummary {.raises: [].}`

add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`ProfilerMetric = (SrcLoc, OverallMetrics)`

add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`proc newCollector*(`
			`ProfilingCollector: typedesc,`
			`name: string,`
			`help: string,`
			`perfSampler: PerfSampler,`
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`k: int = 10,`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`registry: Registry = defaultRegistry,`
			`): ProfilingCollector =`
			`result = ProfilingCollector.newCollector(`
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`name = name, help = help, registry = registry)`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`result.perfSampler = perfSampler`
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`result.k = k`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`proc metricValue(duration: timer.Duration):`
			`float64 = duration.nanoseconds.float64`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`proc collectSlowestProcs(`
			`self: ProfilingCollector,`
			`profilerMetrics: seq[ProfilerMetric],`
			`prometheusMetrics: var Metrics,`
			`timestampMillis: int64,`
			`k: int,`
			`): void =`

			`const locationLabelsKeys = @["proc", "file", "line"]`

			`for (i, pair) in enumerate(profilerMetrics):`
			`if i == k:`
			`break`

			`let (location, metrics) = pair`

			`proc addLabeledMetric(name: string,`
			`value: timer.Duration,`
			`prometheusMetrics: var Metrics): void =`
			`let labelValues = @[`
			`$(location.procedure),`
			`$(location.file),`
			`$(location.line),`
			`]`

			`var procMetrics = prometheusMetrics.mGetOrPut(labelValues, @[])`

			`procMetrics.add(`
			`Metric(`
			`name: name,`
			`value: value.metricValue(),`
			`labels: locationLabelsKeys,`
			`labelValues: labelValues,`
			`)`
			`)`

			`# If you don't reassign, your modifications are simply lost due to nim's`
			`# weird var semantics.`
			`prometheusMetrics[labelValues] = procMetrics`

			`addLabeledMetric(`
			`"total_exec_time", metrics.totalExecTime, prometheusMetrics)`
			`addLabeledMetric(`
			`"total_run_time", metrics.totalRunTime, prometheusMetrics)`
			`addLabeledMetric(`
			`"total_wall_time", metrics.totalWallTime, prometheusMetrics)`
			`addLabeledMetric(`
			`"max_single_exec_time", metrics.maxSingleTime, prometheusMetrics)`

			`proc collectOutlierMetrics(`
			`self: ProfilingCollector,`
			`profilerMetrics: seq[ProfilerMetric],`
			`prometheusMetrics: var Metrics,`
			`timestampMillis: int64,`
			`): void =`
			`## Adds summary metrics for the procs that have the highest exec time`
			`## (which stops the async loop) and the highest max exec time. This can`
			`## help spot outliers.`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00
			`var largestExecTime = low(timer.Duration)`
			`var largestMaxExecTime = low(timer.Duration)`
add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00
			`for (_, metric) in profilerMetrics:`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`if metric.maxSingleTime > largestMaxExecTime:`
			`largestMaxExecTime = metric.maxSingleTime`
			`if metric.totalExecTime > largestExecTime:`
			`largestExecTime = metric.totalExecTime`

add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`prometheusMetrics[@[]].add(Metric(`
			`name: "largest_total_exec_time",`
			`value: largestExecTime.metricValue(),`
			`timestamp: timestampMillis,`
			`))`

			`prometheusMetrics[@[]].add(Metric(`
			`name: "largest_max_exec_time",`
			`value: largestMaxExecTime.metricValue(),`
			`timestamp: timestampMillis,`
			`))`

			`method collect*(self: ProfilingCollector): Metrics =`
			`let now = times.getTime().toMilliseconds()`

			`var prometheusMetrics = Metrics()`
			`prometheusMetrics[@[]] = newSeq[Metric]()`

			`# Samples the underlying metrics and orders pairs by total execution time.`
			`var currentMetrics = self.`
			`perfSampler().`
			`pairs.`
			`toSeq.`
			`map(`
			`proc (pair: (ptr SrcLoc, OverallMetrics)): ProfilerMetric =`
			`(pair[0][], pair[1])`
			`).`
			`sorted(`
			`proc (a, b: ProfilerMetric): int =`
			`cmp(a[1].totalExecTime, b[1].totalExecTime),`
			`order = SortOrder.Descending`
add basic prometheus profiling metrics tracker 2023-11-03 19:04:37 -03:00			`)`

add labeled top-k slowest async procs to prometheus collector 2023-11-06 15:10:13 -03:00			`# otherwise the compiler keeps complaining of a phantom KeyError`
			`{.cast(raises:[]).}:`
			`self.collectOutlierMetrics(currentMetrics, prometheusMetrics, now)`
			`self.collectSlowestProcs(currentMetrics, prometheusMetrics, now, self.k)`

			`prometheusMetrics`