nwaku/vendor/nim-metrics/metrics.nim

1167 lines
45 KiB
Nim

# Copyright (c) 2019-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license: http://opensource.org/licenses/MIT
# * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Exceptions coming out of this library are mostly handled but, due to bugs
# in Nim exception tracking and deficiencies in the standard library, not quite
# all exceptions can be tracked.
#
# When we do manage to catch an unexpected exception, we'll do one of several
# things:
# * Try to print the message to stderr
# * Raise a tracked exception - we use this strategy during collector
# registration
{.push raises: [Defect].} # Disabled further down for some parts of the code
import locks, net, os, sets, tables, times
when defined(metrics):
import algorithm, hashes, random, sequtils, strutils,
metrics/common
export tables # for custom collectors that need to work with the "Metrics" type
when defined(posix):
import posix
type
Labels* = seq[string]
LabelsParam* = openArray[string]
Metric* = ref object of RootObj
name*: string
value*: float64
timestamp*: int64 # UTC, in ms
labels*: Labels
labelValues*: Labels
Metrics* = OrderedTable[Labels, seq[Metric]]
Collector* = ref object of RootObj
lock*: Lock
name*: string
help*: string
typ*: string
labels*: Labels
metrics*: Metrics
creationThreadId*: int
sampleRate*: float # only used by StatsD counters
IgnoredCollector* = object
Counter* = ref object of Collector
Gauge* = ref object of Collector
Summary* = ref object of Collector
Histogram* = ref object of Collector # a cumulative histogram, not a regular one
buckets*: seq[float64]
Registry* = ref object of RootObj
lock*: Lock
collectors*: OrderedSet[Collector]
RegistrationError* = object of CatchableError
const CONTENT_TYPE* = "text/plain; version=0.0.4; charset=utf-8"
#########
# utils #
#########
when defined(metrics):
proc toMilliseconds*(time: times.Time): int64 =
return convert(Seconds, Milliseconds, time.toUnix()) + convert(Nanoseconds, Milliseconds, time.nanosecond())
template processHelp*(help: string): string =
help.multireplace([("\\", "\\\\"), ("\n", "\\n")])
template processLabelValue*(labelValue: string): string =
labelValue.multireplace([("\\", "\\\\"), ("\n", "\\n"), ("\"", "\\\"")])
proc toText*(metric: Metric, showTimestamp = true): string =
result = metric.name
if metric.labels.len > 0:
result.add('{')
var textLabels: seq[string] = @[]
for i in 0..metric.labels.high:
try:
textLabels.add("$#=\"$#\"" % [metric.labels[i], metric.labelValues[i].processLabelValue()])
except ValueError as e:
printError(e.msg)
result.add(textLabels.join(","))
result.add('}')
result.add(" " & $metric.value)
if showTimestamp and metric.timestamp > 0:
result.add(" " & $metric.timestamp)
proc `$`*(metric: Metric): string =
metric.toText()
const
nameRegexStr = r"^[a-zA-Z_:][a-zA-Z0-9_:]*$"
labelRegexStr = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
when not defined(withoutPCRE):
import re
let
nameRegex {.global.} = re(nameRegexStr)
labelRegex {.global.} = re(labelRegexStr)
template validName(name): bool =
name.contains(nameRegex)
template validLabel(label): bool =
label.contains(labelRegex)
else:
const
labelStartChars = {'a'..'z', 'A'..'Z', '_'}
labelChars = labelStartChars + {'0'..'9'}
nameStartChars = labelStartChars + {':'}
nameChars = labelChars + {':'}
template validate(ident, startChars, chars): bool =
ident.len > 0 and ident[0] in startChars and @ident.allIt(it in chars)
template validName(name): bool =
validate(name, nameStartChars, nameChars)
template validLabel(label): bool =
validate(label, labelStartChars, labelChars)
proc validateName*(name: string) {.raises: [Defect, ValueError].} =
if not validName(name):
raise newException(ValueError, "Invalid name: '" & name & "'. It should match the regex: " & nameRegexStr)
proc validateLabels(labels: LabelsParam, invalidLabelNames: openArray[string] = []) {.raises: [Defect, ValueError].} =
for label in labels:
if not validLabel(label):
raise newException(ValueError, "Invalid label: '" & label & "'. It should match the regex: '" & labelRegexStr & "'.")
if label.startsWith("__"):
raise newException(ValueError, "Invalid label: '" & label & "'. It should not start with '__'.")
if label in invalidLabelNames:
raise newException(ValueError, "Invalid label: '" & label & "'. It should not be one of: " & $invalidLabelNames & ".")
######################
# generic collectors #
######################
when defined(metrics):
template getEmptyLabelValues*(collector: Collector): Labels =
sequtils.repeat("", len(collector.labels))
proc validateLabelValues*(collector: Collector, labelValues: LabelsParam): Labels {.raises: [Defect, ValueError].} =
if labelValues.len == 0:
result = collector.getEmptyLabelValues()
elif labelValues.len != collector.labels.len:
raise newException(ValueError, "The number of label values doesn't match the number of labels.")
else:
result = @labelValues
# avoid having to change another thread's heap
if result notin collector.metrics and collector.creationThreadId != getThreadId():
raise newException(AccessViolationError, "Adding a new combination of label values from another thread than the one in which the collector was created is not allowed.")
method hash*(collector: Collector): Hash {.base.} =
result = result !& collector.name.hash
for label in collector.labels:
result = result !& label.hash
result = !$result
# Hash-based data types like OrderedSet don't just compare hashes to determine
# if a key is present, but also compare the keys themselves, so we need to
# override the `==` operator.
method `==`*(x, y: Collector): bool {.base.} =
x.name == y.name and x.labels == y.labels
method collect*(collector: Collector): Metrics {.base.} =
return collector.metrics
proc toTextLines*(collector: Collector, metricsTable: Metrics, showTimestamp = true): seq[string] =
try:
result = @[
"# HELP $# $#" % [collector.name, collector.help.processHelp()],
"# TYPE $# $#" % [collector.name, collector.typ],
]
for labelValues, metrics in metricsTable:
for metric in metrics:
result.add(metric.toText(showTimestamp))
except ValueError as e:
printError(e.msg)
result = @[""]
proc toText*(collector: Collector, showTimestamp = true): string =
collector.toTextLines(collector.metrics, showTimestamp).join("\n")
proc `$`*(collector: Collector): string =
collector.toText()
# Used for custom collectors, to shield the API user from having to deal with
# internal details like lock initialisation.
proc buildCollector* [T] (typ: typedesc[T], name: string, help: string, labels: LabelsParam = @[]): T {.raises: [Defect, ValueError].} =
validateName(name)
validateLabels(labels)
result = T(name: name,
help: help,
typ: "gauge", # Prometheus does not support a non-standard value here
labels: @labels,
creationThreadId: getThreadId())
result.lock.initLock()
proc `$`*(collector: type IgnoredCollector): string = ""
# for testing
template value*(collector: Collector | type IgnoredCollector, labelValues: LabelsParam = @[]): float64 =
var res: float64
when defined(metrics) and collector is not IgnoredCollector:
# Don't access the "metrics" field directly, so we can support custom
# collectors.
withLock collector.lock:
res = collector.collect()[@labelValues][0].value
else:
res = 0.0
res
# for testing
proc valueByName*(collector: Collector | type IgnoredCollector,
metricName: string,
labelValues: LabelsParam = @[],
extraLabelValues: LabelsParam = @[]): float64 {.raises: [Defect, ValueError].} =
when defined(metrics) and collector is not IgnoredCollector:
let allLabelValues = @labelValues & @extraLabelValues
withLock collector.lock:
for metric in collector.collect()[@labelValues]:
if metric.name == metricName and metric.labelValues == allLabelValues:
return metric.value
raise newException(KeyError, "No such metric name for this collector: '" & metricName & "' (label values = " & $allLabelValues & ").")
############
# registry #
############
proc newRegistry*(): Registry =
when defined(metrics):
new(result)
result.lock.initLock()
# needs to be {.global.} because of the alternative API's usage of {.global.} collector vars
var defaultRegistry* {.global.} = newRegistry()
# We use a generic type here in order to avoid the hidden type casting of
# Collector child types to the parent type.
proc register* [T] (collector: T, registry = defaultRegistry) {.raises: [Defect, RegistrationError].} =
when defined(metrics):
withLock registry.lock:
if collector in registry.collectors:
raise newException(RegistrationError, "Collector already registered: " & collector.name)
registry.collectors.incl(collector)
proc unregister* [T] (collector: T, registry = defaultRegistry) {.raises: [Defect, RegistrationError].} =
when defined(metrics) and collector is not IgnoredCollector:
withLock registry.lock:
if collector notin registry.collectors:
raise newException(RegistrationError, "Collector not registered.")
registry.collectors.excl(collector)
proc unregister* (collector: type IgnoredCollector, registry = defaultRegistry) = discard
proc collect*(registry: Registry): OrderedTable[Collector, Metrics] =
when defined(metrics):
withLock registry.lock:
for collector in registry.collectors:
var collectorCopy: Collector
withLock collector.lock:
deepCopy(collectorCopy, collector)
collectorCopy.lock.initLock()
result[collectorCopy] = collectorCopy.collect()
proc toText*(registry: Registry, showTimestamp = true): string =
when defined(metrics):
var res: seq[string] = @[]
for collector, metricsTable in registry.collect():
res.add(collector.toTextLines(metricsTable, showTimestamp))
res.add("")
return res.join("\n")
proc `$`*(registry: Registry): string =
registry.toText()
#####################
# custom collectors #
#####################
when defined(metrics):
# Used for custom collectors, to shield the API user from having to deal with
# internal details like lock initialisation.
# Also used internally, for creating standard collectors, to avoid code
# duplication.
proc newCollector* [T] (typ: typedesc[T], name: string, help: string, labels: LabelsParam = @[],
registry = defaultRegistry, standardType = "gauge"): T
{.raises: [Defect, ValueError, RegistrationError].} =
validateName(name)
validateLabels(labels)
result = T(name: name,
help: help,
typ: standardType, # Prometheus does not support a non-standard value here
labels: @labels,
creationThreadId: getThreadId())
result.lock.initLock()
result.register(registry)
#######################################
# export metrics to StatsD and Carbon #
#######################################
when defined(metrics):
type
MetricProtocol* = enum
STATSD
CARBON
NetProtocol* = enum
TCP
UDP
ExportBackend* = object
metricProtocol*: MetricProtocol
netProtocol*: NetProtocol
address*: string
port*: Port
ExportedMetric = object
name: string
value: float64
increment: float64
metricType: string
timestamp: int64
sampleRate: float # only used by StatsD
const
METRIC_EXPORT_BUFER_SIZE = 1024 # used by exportChan
CONNECT_TIMEOUT_MS = 100 # in milliseconds
RECONNECT_INTERVAL = initDuration(seconds = 10)
var
exportBackends*: seq[ExportBackend] = @[]
exportBackendsLock*: Lock
exportChan: Channel[ExportedMetric]
exportThread: Thread[void]
sockets: seq[Socket] = @[] # we maintain one socket per backend
lastConnectionTime: seq[times.Time] = @[] # last time we tried to connect the corresponding socket
initLock(exportBackendsLock)
exportChan.open(maxItems = METRIC_EXPORT_BUFER_SIZE)
proc addExportBackend*(metricProtocol: MetricProtocol, netProtocol: NetProtocol, address: string, port: Port) =
withLock(exportBackendsLock):
exportBackends.add(ExportBackend(
metricProtocol: metricProtocol,
netProtocol: netProtocol,
address: address,
port: port
))
proc updateSystemMetrics*() {.gcsafe.} # defined later in this file
var systemMetricsAutomaticUpdate = true # whether to piggy-back on changes of user-defined metrics
proc getSystemMetricsAutomaticUpdate*(): bool =
return systemMetricsAutomaticUpdate
proc setSystemMetricsAutomaticUpdate*(value: bool) =
systemMetricsAutomaticUpdate = value
proc pushMetrics*(name: string, value: float64, increment = 0.float64, metricType: string,
timestamp: int64, sampleRate = 1.float, doUpdateSystemMetrics = true) {.raises: [Defect].} =
# this may run from different threads
if systemMetricsAutomaticUpdate and doUpdateSystemMetrics:
updateSystemMetrics()
if len(exportBackends) == 0:
# no backends configured
return
# Send a new metric to the thread handling the networking.
# Silently drop it if the channel's buffer is full.
try:
discard exportChan.trySend(ExportedMetric(
name: name,
value: value,
increment: increment,
metricType: metricType,
timestamp: timestamp,
sampleRate: sampleRate
))
except Exception as e:
printError(e.msg)
# connect or reconnect the socket at position i in `sockets`
proc reconnectSocket(i: int, backend: ExportBackend) {.raises: [Defect, OSError].} =
# Throttle it.
# We don't expect enough backends to worry about the thundering herd problem.
if getTime() - lastConnectionTime[i] < RECONNECT_INTERVAL:
sleep(100) # silly optimisation for an artificial benchmark where we try to
# export as many metric updates as possible with a missing backend
return
# try to close any existing socket, first
if sockets[i] != nil:
try:
sockets[i].close()
except:
discard
sockets[i] = nil # we use this as a flag to avoid sends without a connection
# create a new socket
case backend.netProtocol:
of UDP:
sockets[i] = newSocket(Domain.AF_INET, SockType.SOCK_DGRAM, Protocol.IPPROTO_UDP)
of TCP:
sockets[i] = newSocket()
# try to connect
lastConnectionTime[i] = getTime()
try:
sockets[i].connect(backend.address, backend.port, timeout = CONNECT_TIMEOUT_MS)
except:
try:
sockets[i].close()
except:
discard
sockets[i] = nil
proc pushMetricsWorker() {.thread.} =
ignoreSignalsInThread()
var
data: ExportedMetric # received from the channel
payload: string
finalValue: float64
sampleString: string
# seed the simple PRNG we're using for sample rates
randomize()
# No custom cleanup needed here, so let this thread be killed, the sockets
# closed, etc., by the OS.
try:
while true:
data = exportChan.recv() # blocking read
withLock(exportBackendsLock):
{.gcsafe.}:
# Account for backends added after this thread is launched. We don't
# support backend deletion.
if len(sockets) < len(exportBackends):
sockets.setLen(len(exportBackends))
if len(lastConnectionTime) < len(exportBackends):
lastConnectionTime.setLen(len(exportBackends))
# send the metrics
for i, backend in exportBackends:
case backend.metricProtocol:
of STATSD:
finalValue = data.value
sampleString = ""
if data.metricType == "c":
# StatsD wants only the counter's increment, while Carbon wants the cumulated value
finalValue = data.increment
# If the sample rate was set, throw the dice here.
if data.sampleRate > 0 and data.sampleRate < 1.float:
if rand(max = 1.float) > data.sampleRate:
# skip it
continue
sampleString = "|@" & $data.sampleRate
payload = "$#:$#|$#$#\n" % [data.name, $finalValue, data.metricType, sampleString]
of CARBON:
# Carbon wants a 32-bit timestamp in seconds.
payload = "$# $# $#\n" % [data.name, $data.value, $(data.timestamp div 1000).int32]
if sockets[i] == nil:
reconnectSocket(i, backend)
if sockets[i] == nil:
# we're in the waiting period
continue
try:
sockets[i].send(payload, flags = {}) # the default flags would not raise an exception on a broken connection
except OSError:
reconnectSocket(i, backend)
except Exception as e: # std lib raises lots of these
printError(e.msg)
exportThread.createThread(pushMetricsWorker)
###########
# counter #
###########
when defined(metrics):
proc newCounterMetrics(name: string, labels, labelValues: LabelsParam): seq[Metric] =
result = @[
Metric(name: name & "_total",
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_created",
labels: @labels,
labelValues: @labelValues,
value: getTime().toUnix().float64),
]
proc validateCounterLabelValues(counter: Counter, labelValues: LabelsParam): Labels {.raises: [Defect, ValueError].} =
result = validateLabelValues(counter, labelValues)
if result notin counter.metrics:
counter.metrics[result] = newCounterMetrics(counter.name, counter.labels, result)
# don't document this one, even if we're forced to make it public, because it
# won't work when all (or some) collectors are disabled
proc newCounter*(name: string, help: string, labels: LabelsParam = @[],
registry = defaultRegistry, sampleRate = 1.float): Counter
{.raises: [Defect, ValueError, RegistrationError].} =
result = Counter.newCollector(name, help, labels, registry, "counter")
result.sampleRate = sampleRate
if labels.len == 0:
result.metrics[@labels] = newCounterMetrics(name, labels, labels)
template declareCounter*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
sampleRate = 1.float,
name = "") {.dirty.} =
# fine-grained collector disabling will go in here, turning disabled
# collectors into type aliases for IgnoredCollector
when defined(metrics):
var identifier = newCounter(if name != "": name else: astToStr(identifier), help, labels, registry, sampleRate)
else:
type identifier = IgnoredCollector
template declarePublicCounter*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
sampleRate = 1.float,
name = "") {.dirty.} =
when defined(metrics):
var identifier* = newCounter(if name != "": name else: astToStr(identifier), help, labels, registry, sampleRate)
else:
type identifier* = IgnoredCollector
#- alternative API (without support for custom help strings, labels or custom registries)
#- different collector types with the same names are allowed
#- don't mark this proc as {.inline.} because it's incompatible with {.global.}: https://github.com/status-im/nim-metrics/pull/5#discussion_r304687474
when defined(metrics):
proc counter*(name: static string): Counter {.raises: [Defect, ValueError, RegistrationError].} =
# This {.global.} var assignment is lifted from the procedure and placed in a
# special module init section that's guaranteed to run only once per program.
# Calls to this proc will just return the globally initialised variable.
var res {.global.} = newCounter(name, "")
return res
else:
template counter*(name: static string): untyped =
IgnoredCollector
proc incCounter(counter: Counter, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics):
try:
var timestamp = getTime().toMilliseconds()
if amount < 0:
raise newException(ValueError, "Counter.inc() cannot be used with negative amounts.")
withLock counter.lock:
let labelValuesCopy = validateCounterLabelValues(counter, labelValues)
counter.metrics[labelValuesCopy][0].value += amount.float64
counter.metrics[labelValuesCopy][0].timestamp = timestamp
pushMetrics(name = counter.name,
value = counter.metrics[labelValuesCopy][0].value,
increment = amount.float64,
metricType = "c",
timestamp = timestamp,
sampleRate = counter.sampleRate)
except Exception as e:
printError(e.msg)
template inc*(counter: Counter | type IgnoredCollector, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics) and counter is not IgnoredCollector:
{.gcsafe.}: incCounter(counter, amount, labelValues)
template countExceptions*(counter: Counter | type IgnoredCollector, typ: typedesc, labelValues: LabelsParam, body: untyped) =
when defined(metrics) and counter is not IgnoredCollector:
try:
body
except typ as exc:
counter.inc(1, labelValues)
raise exc
else:
body
template countExceptions*(counter: Counter | type IgnoredCollector, typ: typedesc, body: untyped) =
when defined(metrics) and counter is not IgnoredCollector:
let labelValues: Labels = @[]
counter.countExceptions(typ, labelValues):
body
else:
body
template countExceptions*(counter: Counter | type IgnoredCollector, labelValues: LabelsParam, body: untyped) =
countExceptions(counter, Exception, labelValues, body)
template countExceptions*(counter: Counter | type IgnoredCollector, body: untyped) =
when defined(metrics) and counter is not IgnoredCollector:
let labelValues: Labels = @[]
counter.countExceptions(labelValues):
body
else:
body
#########
# gauge #
#########
when defined(metrics):
proc newGaugeMetrics(name: string, labels, labelValues: LabelsParam): seq[Metric] =
result = @[
Metric(name: name,
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_created",
labels: @labels,
labelValues: @labelValues,
value: getTime().toUnix().float64),
]
proc validateGaugeLabelValues(gauge: Gauge, labelValues: LabelsParam): Labels {.raises: [Defect, ValueError].} =
result = validateLabelValues(gauge, labelValues)
if result notin gauge.metrics:
gauge.metrics[result] = newGaugeMetrics(gauge.name, gauge.labels, result)
proc newGauge*(name: string, help: string, labels: LabelsParam = @[],
registry = defaultRegistry): Gauge
{.raises: [Defect, ValueError, RegistrationError].} =
result = Gauge.newCollector(name, help, labels, registry, "gauge")
if labels.len == 0:
result.metrics[@labels] = newGaugeMetrics(name, labels, labels)
template declareGauge*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
name = "") {.dirty.} =
when defined(metrics):
var identifier = newGauge(if name != "": name else: astToStr(identifier), help, labels, registry)
else:
type identifier = IgnoredCollector
# alternative API
when defined(metrics):
proc gauge*(name: static string): Gauge {.raises: [Defect, ValueError, RegistrationError].} =
var res {.global.} = newGauge(name, "") # lifted line
return res
else:
template gauge*(name: static string): untyped =
IgnoredCollector
template declarePublicGauge*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
name = "") {.dirty.} =
when defined(metrics):
var identifier* = newGauge(if name != "": name else: astToStr(identifier), help, labels, registry)
else:
type identifier* = IgnoredCollector
proc incGauge(gauge: Gauge, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics):
try:
var timestamp = getTime().toMilliseconds()
withLock gauge.lock:
let labelValuesCopy = validateGaugeLabelValues(gauge, labelValues)
gauge.metrics[labelValuesCopy][0].value += amount.float64
gauge.metrics[labelValuesCopy][0].timestamp = timestamp
pushMetrics(name = gauge.name,
value = gauge.metrics[labelValuesCopy][0].value,
metricType = "g",
timestamp = timestamp)
except Exception as e:
printError(e.msg)
proc decGauge(gauge: Gauge, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics):
gauge.inc((-amount).float64, labelValues)
proc setGauge(gauge: Gauge, value: int64|float64, labelValues: LabelsParam = @[], doUpdateSystemMetrics: bool) =
when defined(metrics):
try:
var timestamp = getTime().toMilliseconds()
withLock gauge.lock:
let labelValuesCopy = validateGaugeLabelValues(gauge, labelValues)
gauge.metrics[labelValuesCopy][0].value = value.float64
gauge.metrics[labelValuesCopy][0].timestamp = timestamp
pushMetrics(name = gauge.name,
value = value.float64,
metricType = "g",
timestamp = timestamp,
doUpdateSystemMetrics = doUpdateSystemMetrics)
except Exception as e:
printError(e.msg)
# the "type IgnoredCollector" case is covered by Counter.inc()
template inc*(gauge: Gauge, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics):
{.gcsafe.}: incGauge(gauge, amount, labelValues)
template dec*(gauge: Gauge | type IgnoredCollector, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics) and gauge is not IgnoredCollector:
{.gcsafe.}: decGauge(gauge, amount, labelValues)
template set*(gauge: Gauge | type IgnoredCollector, value: int64|float64, labelValues: LabelsParam = @[], doUpdateSystemMetrics = true) =
when defined(metrics) and gauge is not IgnoredCollector:
{.gcsafe.}: setGauge(gauge, value, labelValues, doUpdateSystemMetrics)
# in seconds
proc setToCurrentTime*(gauge: Gauge | type IgnoredCollector, labelValues: LabelsParam = @[]) =
when defined(metrics) and gauge is not IgnoredCollector:
gauge.set(getTime().toUnix(), labelValues)
template trackInProgress*(gauge: Gauge | type IgnoredCollector, labelValues: LabelsParam, body: untyped) =
when defined(metrics) and gauge is not IgnoredCollector:
gauge.inc(1, labelValues)
body
gauge.dec(1, labelValues)
else:
body
template trackInProgress*(gauge: Gauge | type IgnoredCollector, body: untyped) =
when defined(metrics) and gauge is not IgnoredCollector:
let labelValues: Labels = @[]
gauge.trackInProgress(labelValues):
body
else:
body
# in seconds
template time*(gauge: Gauge | type IgnoredCollector, labelValues: LabelsParam, body: untyped) =
when defined(metrics) and gauge is not IgnoredCollector:
let start = times.toUnix(getTime())
body
gauge.set(times.toUnix(getTime()) - start, labelValues)
else:
body
template time*(collector: Gauge | Summary | Histogram | type IgnoredCollector, body: untyped) =
when defined(metrics) and collector is not IgnoredCollector:
let labelValues: Labels = @[]
collector.time(labelValues):
body
else:
body
###########
# summary #
###########
when defined(metrics):
proc newSummaryMetrics(name: string, labels, labelValues: LabelsParam): seq[Metric] =
result = @[
Metric(name: name & "_sum",
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_count",
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_created",
labels: @labels,
labelValues: @labelValues,
value: getTime().toUnix().float64),
]
proc validateSummaryLabelValues(summary: Summary, labelValues: LabelsParam): Labels {.raises: [Defect, ValueError].} =
result = validateLabelValues(summary, labelValues)
if result notin summary.metrics:
summary.metrics[result] = newSummaryMetrics(summary.name, summary.labels, result)
proc newSummary*(name: string, help: string, labels: LabelsParam = @[],
registry = defaultRegistry): Summary
{.raises: [Defect, ValueError, RegistrationError].} =
validateLabels(labels, invalidLabelNames = ["quantile"])
result = Summary.newCollector(name, help, labels, registry, "summary")
if labels.len == 0:
result.metrics[@labels] = newSummaryMetrics(name, labels, labels)
template declareSummary*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
name = "") {.dirty.} =
when defined(metrics):
var identifier = newSummary(if name != "": name else: astToStr(identifier), help, labels, registry)
else:
type identifier = IgnoredCollector
template declarePublicSummary*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
name = "") {.dirty.} =
when defined(metrics):
var identifier* = newSummary(if name != "": name else: astToStr(identifier), help, labels, registry)
else:
type identifier* = IgnoredCollector
when defined(metrics):
proc summary*(name: static string): Summary {.raises: [Defect, ValueError, RegistrationError].} =
var res {.global.} = newSummary(name, "") # lifted line
return res
else:
template summary*(name: static string): untyped =
IgnoredCollector
proc observeSummary(summary: Summary, amount: int64|float64, labelValues: LabelsParam = @[]) =
when defined(metrics):
try:
var timestamp = getTime().toMilliseconds()
withLock summary.lock:
let labelValuesCopy = validateSummaryLabelValues(summary, labelValues)
summary.metrics[labelValuesCopy][0].value += amount.float64 # _sum
summary.metrics[labelValuesCopy][0].timestamp = timestamp
summary.metrics[labelValuesCopy][1].value += 1.float64 # _count
summary.metrics[labelValuesCopy][1].timestamp = timestamp
except Exception as e:
printError(e.msg)
template observe*(summary: Summary | type IgnoredCollector, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics) and summary is not IgnoredCollector:
{.gcsafe.}: observeSummary(summary, amount, labelValues)
# in seconds
# the "type IgnoredCollector" case and the version without labels are covered by Gauge.time()
template time*(collector: Summary | Histogram, labelValues: LabelsParam, body: untyped) =
when defined(metrics):
let start = times.toUnix(getTime())
body
collector.observe(times.toUnix(getTime()) - start, labelValues)
else:
body
#############
# histogram #
#############
let defaultHistogramBuckets* {.global.} = [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, Inf]
when defined(metrics):
proc newHistogramMetrics(name: string, labels, labelValues: LabelsParam, buckets: seq[float64]): seq[Metric] =
result = @[
Metric(name: name & "_sum",
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_count",
labels: @labels,
labelValues: @labelValues),
Metric(name: name & "_created",
labels: @labels,
labelValues: @labelValues,
value: getTime().toUnix().float64),
]
var bucketLabels = @labels & "le"
for bucket in buckets:
var bucketStr = $bucket
if bucket == Inf:
bucketStr = "+Inf"
result.add(
Metric(name: name & "_bucket",
labels: bucketLabels,
labelValues: @labelValues & bucketStr)
)
proc validateHistogramLabelValues(histogram: Histogram, labelValues: LabelsParam): Labels {.raises: [Defect, ValueError].} =
result = validateLabelValues(histogram, labelValues)
if result notin histogram.metrics:
histogram.metrics[result] = newHistogramMetrics(histogram.name, histogram.labels, result, histogram.buckets)
proc newHistogram*(name: string, help: string, labels: LabelsParam = @[],
registry = defaultRegistry, buckets: openArray[float64] = defaultHistogramBuckets): Histogram
{.raises: [Defect, ValueError, RegistrationError].} =
validateLabels(labels, invalidLabelNames = ["le"])
var bucketsSeq = @buckets
if bucketsSeq.len > 0 and bucketsSeq[^1] != Inf:
bucketsSeq.add(Inf)
if bucketsSeq.len < 2:
raise newException(ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. At least 2 required.")
if not bucketsSeq.isSorted(system.cmp[float64]):
raise newException(ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. Must be sorted.")
result = Histogram.newCollector(name, help, labels, registry, "histogram")
result.buckets = bucketsSeq
if labels.len == 0:
result.metrics[@labels] = newHistogramMetrics(name, labels, labels, bucketsSeq)
template declareHistogram*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
buckets: openArray[float64] = defaultHistogramBuckets,
name = "") {.dirty.} =
when defined(metrics):
var identifier = newHistogram(if name != "": name else: astToStr(identifier), help, labels, registry, buckets)
else:
type identifier = IgnoredCollector
template declarePublicHistogram*(identifier: untyped,
help: static string,
labels: LabelsParam = @[],
registry = defaultRegistry,
buckets: openArray[float64] = defaultHistogramBuckets,
name = "") {.dirty.} =
when defined(metrics):
var identifier* = newHistogram(if name != "": name else: astToStr(identifier), help, labels, registry, buckets)
else:
type identifier* = IgnoredCollector
when defined(metrics):
proc histogram*(name: static string): Histogram {.raises: [Defect, ValueError, RegistrationError].} =
var res {.global.} = newHistogram(name, "") # lifted line
return res
else:
template histogram*(name: static string): untyped =
IgnoredCollector
proc observeHistogram(histogram: Histogram, amount: int64|float64, labelValues: LabelsParam = @[]) =
when defined(metrics):
try:
var timestamp = getTime().toMilliseconds()
withLock histogram.lock:
let labelValuesCopy = validateHistogramLabelValues(histogram, labelValues)
histogram.metrics[labelValuesCopy][0].value += amount.float64 # _sum
histogram.metrics[labelValuesCopy][0].timestamp = timestamp
histogram.metrics[labelValuesCopy][1].value += 1.float64 # _count
histogram.metrics[labelValuesCopy][1].timestamp = timestamp
for i, bucket in histogram.buckets:
if amount.float64 <= bucket:
#- "le" probably stands for "less or equal"
#- the same observed value can increase multiple buckets, because this is
# a cumulative histogram
histogram.metrics[labelValuesCopy][i + 3].value += 1.float64 # _bucket{le="<bucket value>"}
histogram.metrics[labelValuesCopy][i + 3].timestamp = timestamp
except Exception as e:
printError(e.msg)
# the "type IgnoredCollector" case is covered by Summary.observe()
template observe*(histogram: Histogram, amount: int64|float64 = 1, labelValues: LabelsParam = @[]) =
when defined(metrics):
{.gcsafe.}: observeHistogram(histogram, amount, labelValues)
#########################
# update system metrics #
#########################
when defined(metrics):
const metrics_max_hooks = 16
type
SystemMetricsUpdateProc = proc() {.gcsafe, nimcall.}
ThreadMetricsUpdateProc = proc() {.gcsafe, nimcall.}
let mainThreadID = getThreadId()
var
threadMetricsUpdateProcs: array[metrics_max_hooks, ThreadMetricsUpdateProc]
threadMetricsUpdateProcsIndex = 0
systemMetricsUpdateInterval = initDuration(seconds = 10)
systemMetricsLastUpdated = now()
proc getSystemMetricsUpdateInterval*(): Duration =
return systemMetricsUpdateInterval
proc setSystemMetricsUpdateInterval*(value: Duration) =
systemMetricsUpdateInterval = value
proc updateThreadMetrics*() {.gcsafe.} =
for i in 0 ..< threadMetricsUpdateProcsIndex:
try:
threadMetricsUpdateProcs[i]()
except CatchableError as e:
printError(e.msg)
except Exception as e:
raise newException(Defect, e.msg)
# No longer used for all system metrics, which now are custom collectors, but
# still used for main-thread metrics.
proc updateSystemMetrics*() {.gcsafe.} =
if systemMetricsAutomaticUpdate:
# Update system metrics if at least systemMetricsUpdateInterval seconds
# have passed and if we are being called from the main thread.
if getThreadId() == mainThreadID:
let currTime = now()
if currTime >= (systemMetricsLastUpdated + systemMetricsUpdateInterval):
systemMetricsLastUpdated = currTime
# Update thread metrics, only when automation is on and we're in the
# main thread.
updateThreadMetrics()
################
# process info #
################
when defined(metrics) and defined(linux):
import posix
var
btime {.global.}: float64 = 0
ticks {.global.}: float64 # clock ticks per second
pagesize {.global.}: float64 # page size in bytes
if btime == 0:
try:
for line in lines("/proc/stat"):
if line.startsWith("btime"):
btime = line.split(' ')[1].parseFloat()
except IOError:
# /proc not mounted?
discard
ticks = sysconf(SC_CLK_TCK).float64
pagesize = sysconf(SC_PAGE_SIZE).float64
type ProcessInfo = ref object of Gauge
var processInfo* {.global.} = ProcessInfo.newCollector("process_info", "CPU and memory usage")
method collect*(collector: ProcessInfo): Metrics =
let timestamp = getTime().toMilliseconds()
result[@[]] = @[]
try:
if btime == 0:
# we couldn't access /proc
return
# the content of /proc/self/stat looks like this (the command name may contain spaces):
#
# $ cat /proc/self/stat
# 30494 (cat) R 3022 30494 3022 34830 30494 4210688 98 0 0 0 0 0 0 0 20 0 1 0 73800491 10379264 189 18446744073709551615 94060049248256 94060049282149 140735229395104 0 0 0 0 0 0 0 0 0 17 6 0 0 0 0 0 94060049300560 94060049302112 94060076990464 140735229397011 140735229397031 140735229397031 140735229403119 0
let selfStat = readFile("/proc/self/stat").split(") ")[^1].split(' ')
result[@[]] = @[
Metric(
name: "process_virtual_memory_bytes", # Virtual memory size in bytes.
value: selfStat[20].parseFloat(),
timestamp: timestamp,
),
Metric(
name: "process_resident_memory_bytes", # Resident memory size in bytes.
value: selfStat[21].parseFloat() * pagesize,
timestamp: timestamp,
),
Metric(
name: "process_start_time_seconds", # Start time of the process since unix epoch in seconds.
value: selfStat[19].parseFloat() / ticks + btime,
timestamp: timestamp,
),
Metric(
name: "process_cpu_seconds_total", # Total user and system CPU time spent in seconds.
value: (selfStat[11].parseFloat() + selfStat[12].parseFloat()) / ticks,
timestamp: timestamp,
),
]
for line in lines("/proc/self/limits"):
if line.startsWith("Max open files"):
result[@[]].add(
Metric(
name: "process_max_fds", # Maximum number of open file descriptors.
value: line.splitWhiteSpace()[3].parseFloat(), # a simple `split()` does not combine adjacent whitespace
timestamp: timestamp,
)
)
break
result[@[]].add(
Metric(
name: "process_open_fds", # Number of open file descriptors.
value: toSeq(walkDir("/proc/self/fd")).len.float64,
timestamp: timestamp,
)
)
except CatchableError as e:
printError(e.msg)
####################
# Nim runtime info #
####################
when defined(metrics):
type NimRuntimeInfo = ref object of Gauge
var nimRuntimeInfo* {.global.} = NimRuntimeInfo.newCollector("nim_runtime_info", "Nim runtime info")
method collect*(collector: NimRuntimeInfo): Metrics =
let timestamp = getTime().toMilliseconds()
result[@[]] = @[]
try:
when defined(nimTypeNames) and declared(dumpHeapInstances):
# Too high cardinality causes performance issues in Prometheus.
const labelsLimit = 10
var
# Higher size than in the loop for adding metrics
# to avoid missing same name metrics far apart with low values.
heapSizes: array[100, (cstring, int)]
counter: int
heapSum: int # total size of all instances
for data in dumpHeapInstances():
counter += 1
heapSum += data.sizes
var smallest = 0
var dedupe = false
for i in 0..<heapSizes.len:
if heapSizes[i][0] == data.name:
heapSizes[i][1] += data.sizes
dedupe = true
break
if heapSizes[smallest][1] >= heapSizes[i][1]:
smallest = i
if not dedupe and data.sizes > heapSizes[smallest][1]:
heapSizes[smallest] = (data.name, data.sizes)
sort(heapSizes, proc(a, b: auto): auto = b[1] - a[1])
# Lower the number of metrics to reduce metric cardinality.
for i in 0..<labelsLimit:
let (typeName, size) = heapSizes[i]
result[@[]].add(
Metric(
name: "nim_gc_heap_instance_occupied_bytes", # total bytes occupied, by instance type (all threads)
value: size.float64,
timestamp: timestamp,
labels: @["type_name"],
labelValues: @[$typeName],
)
)
result[@[]].add(
Metric(
name: "nim_gc_heap_instance_occupied_summed_bytes", # total bytes occupied by all instance types, in all threads - should be equal to 'sum(nim_gc_mem_occupied_bytes)' when 'updateThreadMetrics()' is being called in all threads, but it's somewhat smaller
value: heapSum.float64,
timestamp: timestamp,
)
)
except CatchableError as e:
printError(e.msg)
declareGauge nim_gc_mem_bytes, "the number of bytes that are owned by a thread's GC", ["thread_id"]
declareGauge nim_gc_mem_occupied_bytes, "the number of bytes that are owned by a thread's GC and hold data", ["thread_id"]
proc updateNimRuntimeInfoThread() =
try:
let threadID = getThreadId()
when declared(getTotalMem):
nim_gc_mem_bytes.set(getTotalMem().float64, labelValues = @[$threadID], doUpdateSystemMetrics = false)
when declared(getOccupiedMem):
nim_gc_mem_occupied_bytes.set(getOccupiedMem().float64, labelValues = @[$threadID], doUpdateSystemMetrics = false)
# TODO: parse the output of `GC_getStatistics()` for more stats
except CatchableError as e:
printError(e.msg)
threadMetricsUpdateProcs[threadMetricsUpdateProcsIndex] = updateNimRuntimeInfoThread
threadMetricsUpdateProcsIndex += 1