This commit is contained in:
gmega 2024-03-01 13:44:06 -03:00
parent 13d7b384b7
commit b972187054
No known key found for this signature in database
GPG Key ID: FFD8DAF00660270F
11 changed files with 515 additions and 479 deletions

View File

@ -8,8 +8,7 @@ license = "MIT or Apache License 2.0"
skipDirs = @["tests"]
requires "nim >= 1.6.16",
"https://github.com/codex-storage/nim-chronos#feature/profiler-v4",
"metrics >= 0.1.0"
"https://github.com/codex-storage/nim-chronos#feature/profiler-v4", "metrics >= 0.1.0"
task test, "Run tests":
exec "nim c --out:./build/testall -r tests/testall.nim"

View File

@ -1,7 +1,8 @@
import ./[profiler, events]
export Event, ExtendedFutureState, ProfilerState, MetricsTotals,
AggregateMetrics, FutureType, execTimeWithChildren
export
Event, ExtendedFutureState, ProfilerState, MetricsTotals, AggregateMetrics,
FutureType, execTimeWithChildren
var profilerInstance {.threadvar.}: ProfilerState

View File

@ -80,7 +80,8 @@ when defined(metrics):
clock: Clock,
sampleInterval: times.Duration,
k: int = 10,
): ChronosProfilerInfo = ChronosProfilerInfo(
): ChronosProfilerInfo =
ChronosProfilerInfo(
sampler: sampler,
clock: clock,
k: k,
@ -95,40 +96,39 @@ when defined(metrics):
timestampMillis: int64,
k: int,
): void =
for (i, pair) in enumerate(profilerMetrics):
if i == k:
break
let (location, metrics) = pair
let locationLabels = @[
$(location.procedure),
$(location.file),
$(location.line),
]
let locationLabels = @[$(location.procedure), $(location.file), $(location.line)]
chronos_exec_time_total.set(metrics.execTime.nanoseconds,
labelValues = locationLabels)
chronos_exec_time_with_children_total.set(
metrics.execTimeWithChildren.nanoseconds,
labelValues = locationLabels
chronos_exec_time_total.set(
metrics.execTime.nanoseconds, labelValues = locationLabels
)
chronos_wall_time_total.set(metrics.wallClockTime.nanoseconds,
labelValues = locationLabels)
chronos_exec_time_with_children_total.set(
metrics.execTimeWithChildren.nanoseconds, labelValues = locationLabels
)
chronos_single_exec_time_max.set(metrics.execTimeMax.nanoseconds,
labelValues = locationLabels)
chronos_wall_time_total.set(
metrics.wallClockTime.nanoseconds, labelValues = locationLabels
)
chronos_call_count_total.set(metrics.callCount.int64,
labelValues = locationLabels)
chronos_single_exec_time_max.set(
metrics.execTimeMax.nanoseconds, labelValues = locationLabels
)
chronos_call_count_total.set(
metrics.callCount.int64, labelValues = locationLabels
)
proc collect*(self: ChronosProfilerInfo, force: bool = false): void =
# Calling this method from the wrong thread has happened a lot in the past,
# so this makes sure we're not doing anything funny.
assert threadId() == moduleInitThread, "You cannot call collect() from" &
assert threadId() == moduleInitThread,
"You cannot call collect() from" &
" a thread other than the one that initialized the metricscolletor module"
let now = self.clock()
@ -136,21 +136,21 @@ when defined(metrics):
return
self.collections += 1
var currentMetrics = self.
sampler().
pairs.
toSeq.
var currentMetrics = self
.sampler().pairs.toSeq
.
# We don't scoop metrics with 0 exec time as we have a limited number of
# prometheus slots, and those are less likely to be useful in debugging
# Chronos performance issues.
filter(
proc(pair: FutureMetrics): bool =
pair[1].execTimeWithChildren.nanoseconds > 0
).
sorted(
)
.sorted(
proc(a, b: FutureMetrics): int =
cmp(a[1].execTimeWithChildren, b[1].execTimeWithChildren),
order = SortOrder.Descending
cmp(a[1].execTimeWithChildren, b[1].execTimeWithChildren)
,
order = SortOrder.Descending,
)
self.collectSlowestProcs(currentMetrics, now.toMilliseconds(), self.k)
@ -183,7 +183,9 @@ when defined(metrics):
k = k,
# We want to collect metrics every 5 seconds.
sampleInterval = initDuration(seconds = 5),
clock = proc (): Time = getTime(),
clock = proc(): Time =
getTime()
,
)
enableProfiling(
@ -192,4 +194,3 @@ when defined(metrics):
if e.newState == ExtendedFutureState.Completed:
asyncProfilerInfo.collect()
)

View File

@ -9,15 +9,14 @@ import chronos/[timer, futures, srcloc]
type
ExtendedFutureState* {.pure.} = enum
Pending,
Running,
Paused,
Completed,
Cancelled,
Failed,
Pending
Running
Paused
Completed
Cancelled
Failed
Event* = object
## A timestamped event transition in a `Future` state.
Event* = object ## A timestamped event transition in a `Future` state.
future: FutureBase
newState*: ExtendedFutureState
timestamp*: Moment
@ -33,16 +32,12 @@ proc `futureId`*(self: Event): uint =
self.future.id
proc mkEvent(future: FutureBase, state: ExtendedFutureState): Event =
Event(
future: future,
newState: state,
timestamp: Moment.now(),
)
Event(future: future, newState: state, timestamp: Moment.now())
proc handleBaseFutureEvent*(future: FutureBase,
state: FutureState): void {.nimcall.} =
proc handleBaseFutureEvent*(future: FutureBase, state: FutureState): void {.nimcall.} =
{.cast(gcsafe).}:
let extendedState = case state:
let extendedState =
case state
of FutureState.Pending: ExtendedFutureState.Pending
of FutureState.Completed: ExtendedFutureState.Completed
of FutureState.Cancelled: ExtendedFutureState.Cancelled
@ -51,10 +46,12 @@ proc handleBaseFutureEvent*(future: FutureBase,
if not isNil(handleFutureEvent):
handleFutureEvent(mkEvent(future, extendedState))
proc handleAsyncFutureEvent*(future: FutureBase,
state: AsyncFutureState): void {.nimcall.} =
proc handleAsyncFutureEvent*(
future: FutureBase, state: AsyncFutureState
): void {.nimcall.} =
{.cast(gcsafe).}:
let extendedState = case state:
let extendedState =
case state
of AsyncFutureState.Running: ExtendedFutureState.Running
of AsyncFutureState.Paused: ExtendedFutureState.Paused
@ -73,4 +70,3 @@ proc stopMonitoring*() =
onBaseFutureEvent = nil
onAsyncFutureEvent = nil
handleFutureEvent = nil

View File

@ -14,32 +14,37 @@ type
## Within the scope of the profiler, a source location identifies
## a future type.
AggregateMetrics* = object
## Stores aggregate metrics for a given `FutureType`.
execTime*: Duration ## The total time that `Future`s of a given
AggregateMetrics* = object ## Stores aggregate metrics for a given `FutureType`.
execTime*: Duration
## The total time that `Future`s of a given
## `FutureType` actually ran; i.e., actively
## occupied the event loop thread, summed
## accross all such `Futures`.
execTimeMax*: Duration ## The maximum time that a `Future` of a
execTimeMax*: Duration
## The maximum time that a `Future` of a
## given `FutureType` actually ran; i.e.,
## actively occupied the event loop thread.
childrenExecTime*: Duration ## Total time that the children of `Future`s
childrenExecTime*: Duration
## Total time that the children of `Future`s
## of this `FutureType` actually ran; i.e.,
## actively occupied the event loop thread,
## summed across all such children.
wallClockTime*: Duration ## Total time that the Future was alive;
wallClockTime*: Duration
## Total time that the Future was alive;
## i.e., the time between the Future's
## creation and its completion, summed
## across all runs of this `FutureType`.
stillbornCount*: uint ## Number of futures of this `FutureType`
stillbornCount*: uint
## Number of futures of this `FutureType`
## that were born in a finished state;
## i.e., a `FutureState` that is not Pending.
callCount*: uint ## Total number of distinct `Future`s observed
callCount*: uint
## Total number of distinct `Future`s observed
## for this `FutureType`.
PartialMetrics = object
@ -63,7 +68,8 @@ type
proc `execTimeWithChildren`*(self: AggregateMetrics): Duration =
self.execTime + self.childrenExecTime
proc push(self: var seq[uint], value: uint): void = self.add(value)
proc push(self: var seq[uint], value: uint): void =
self.add(value)
proc pop(self: var seq[uint]): uint =
let value = self[^1]
@ -71,7 +77,10 @@ proc pop(self: var seq[uint]): uint =
value
proc peek(self: var seq[uint]): Option[uint] =
if self.len == 0: none(uint) else: self[^1].some
if self.len == 0:
none(uint)
else:
self[^1].some
proc `$`(location: SrcLoc): string =
$location.procedure & "[" & $location.file & ":" & $location.line & "]"
@ -79,10 +88,8 @@ proc `$`(location: SrcLoc): string =
proc futureCreated(self: var ProfilerState, event: Event): void =
assert not self.partials.hasKey(event.futureId), $event.location
self.partials[event.futureId] = PartialMetrics(
created: event.timestamp,
state: Pending,
)
self.partials[event.futureId] =
PartialMetrics(created: event.timestamp, state: Pending)
proc bindParent(self: var ProfilerState, metrics: ptr PartialMetrics): void =
let current = self.callStack.peek()
@ -152,15 +159,23 @@ proc futureCompleted(self: var ProfilerState, event: Event): void =
self.partials.del(event.futureId)
proc processEvent*(self: var ProfilerState, event: Event): void {.nimcall, gcsafe, raises: []} =
case event.newState:
of Pending: self.futureCreated(event)
of Running: self.futureRunning(event)
of Paused: self.futurePaused(event)
proc processEvent*(
self: var ProfilerState, event: Event
): void {.nimcall, gcsafe, raises: [].} =
case event.newState
of Pending:
self.futureCreated(event)
of Running:
self.futureRunning(event)
of Paused:
self.futurePaused(event)
# Completion, failure and cancellation are currently handled the same way.
of Completed: self.futureCompleted(event)
of Failed: self.futureCompleted(event)
of Cancelled: self.futureCompleted(event)
of Completed:
self.futureCompleted(event)
of Failed:
self.futureCompleted(event)
of Cancelled:
self.futureCompleted(event)
proc processAllEvents*(self: var ProfilerState, events: seq[Event]): void =
for event in events:

View File

@ -7,7 +7,6 @@ import ../chroprof/events
import ./utils
suite "event ordering expectations":
setup:
startRecording()
@ -15,20 +14,19 @@ suite "event ordering expectations":
stopRecording()
test "should emit correct events for a simple future":
proc simple() {.async.} =
os.sleep(1)
waitFor simple()
check recording == @[
check recording ==
@[
SimpleEvent(state: Pending, procedure: "simple"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "simple"),
SimpleEvent(state: Completed, procedure: "simple"),
]
test "should emit correct events when a single child runs as part of the parent":
proc withChildren() {.async.} =
recordSegment("segment 1")
await sleepAsync(10.milliseconds)
@ -36,11 +34,14 @@ suite "event ordering expectations":
waitFor withChildren()
check recording == @[
check recording ==
@[
SimpleEvent(state: Pending, procedure: "withChildren"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "withChildren"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 1"),
SimpleEvent(state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"
),
SimpleEvent(state: Paused, procedure: "withChildren"),
SimpleEvent(state: Completed, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "withChildren"),
@ -68,7 +69,8 @@ suite "event ordering expectations":
waitFor withChildren()
check recording == @[
check recording ==
@[
# First iteration of parent and each child
SimpleEvent(state: Pending, procedure: "withChildren"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "withChildren"),
@ -79,18 +81,28 @@ suite "event ordering expectations":
SimpleEvent(state: ExtendedFutureState.Pending, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 21"),
SimpleEvent(state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"
),
SimpleEvent(state: ExtendedFutureState.Paused, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Paused, procedure: "child1"),
SimpleEvent(state: ExtendedFutureState.Paused, procedure: "withChildren"),
# Second iteration of child2
SimpleEvent(state: ExtendedFutureState.Completed, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Completed,
procedure: "chronos.sleepAsync(Duration)",
),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 22"),
SimpleEvent(state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"
),
SimpleEvent(state: ExtendedFutureState.Paused, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Completed, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Completed,
procedure: "chronos.sleepAsync(Duration)",
),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "child2"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 23"),
SimpleEvent(state: ExtendedFutureState.Completed, procedure: "child2"),
@ -118,13 +130,19 @@ suite "event ordering expectations":
waitFor withFinally()
check recording == @[
check recording ==
@[
SimpleEvent(state: Pending, procedure: "withFinally"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "withFinally"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 1"),
SimpleEvent(state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Pending, procedure: "chronos.sleepAsync(Duration)"
),
SimpleEvent(state: ExtendedFutureState.Paused, procedure: "withFinally"),
SimpleEvent(state: ExtendedFutureState.Completed, procedure: "chronos.sleepAsync(Duration)"),
SimpleEvent(
state: ExtendedFutureState.Completed,
procedure: "chronos.sleepAsync(Duration)",
),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "withFinally"),
SimpleEvent(state: ExtendedFutureState.Running, procedure: "segment 2"),
SimpleEvent(state: ExtendedFutureState.Completed, procedure: "withFinally"),

View File

@ -8,8 +8,8 @@ import chronos/[srcloc, timer]
import ../chroprof/[api, collector]
suite "metrics collector":
var locations = @[
var locations =
@[
SrcLoc(procedure: "start", file: "discovery.nim", line: 174),
SrcLoc(procedure: "start", file: "discovery.nim", line: 192),
SrcLoc(procedure: "query", file: "manager.nim", line: 323),
@ -23,36 +23,36 @@ suite "metrics collector":
execTimeMax: timer.nanoseconds(80062),
childrenExecTime: timer.nanoseconds(52044),
wallClockTime: timer.nanoseconds(174567),
callCount: 1
callCount: 1,
),
locations[1]: AggregateMetrics(
execTime: timer.nanoseconds(91660),
execTimeMax: timer.nanoseconds(81660),
childrenExecTime: timer.nanoseconds(52495),
wallClockTime: timer.nanoseconds(72941),
callCount: 1
callCount: 1,
),
locations[2]: AggregateMetrics(
execTime: timer.nanoseconds(60529),
execTimeMax: timer.nanoseconds(60529),
childrenExecTime: timer.nanoseconds(9689),
wallClockTime: timer.nanoseconds(60784),
callCount: 1
callCount: 1,
),
locations[3]: AggregateMetrics(
execTime: timer.nanoseconds(60645),
execTimeMax: timer.nanoseconds(41257),
childrenExecTime: timer.nanoseconds(72934),
wallClockTime: timer.nanoseconds(60813),
callCount: 3
callCount: 3,
),
locations[4]: AggregateMetrics(
execTime: timer.nanoseconds(0),
execTimeMax: timer.nanoseconds(0),
childrenExecTime: timer.nanoseconds(0),
wallClockTime: timer.nanoseconds(60813),
callCount: 3
)
callCount: 3,
),
}.toTable
var wallTime = getTime()
@ -61,8 +61,12 @@ suite "metrics collector":
proc setupCollector(k: int = high(int)): void =
collector = ChronosProfilerInfo.newCollector(
sampler = proc (): MetricsTotals = sample,
clock = proc (): Time = wallTime,
sampler = proc(): MetricsTotals =
sample
,
clock = proc(): Time =
wallTime
,
sampleInterval = times.initDuration(minutes = 5),
k = k,
)
@ -74,16 +78,20 @@ suite "metrics collector":
setupCollector(k = 3)
check chronos_exec_time_with_children_total.value(
labelValuesParam = @["start", "discovery.nim", "192"]) == 144155
labelValuesParam = @["start", "discovery.nim", "192"]
) == 144155
check chronos_exec_time_with_children_total.value(
labelValuesParam = @["start", "discovery.nim", "174"]) == 142106
labelValuesParam = @["start", "discovery.nim", "174"]
) == 142106
check chronos_exec_time_with_children_total.value(
labelValuesParam = @["update", "sqliteds.nim", "107"]) == 133579
labelValuesParam = @["update", "sqliteds.nim", "107"]
) == 133579
# This is out of the top-k slowest, so should not have been recorded.
expect system.KeyError:
discard chronos_exec_time_with_children_total.value(
labelValuesParam = @["query", "manager.nim", "323"])
labelValuesParam = @["query", "manager.nim", "323"]
)
test "should not collect metrics again unless enough time has elapsed from last collection":
setupCollector()
@ -101,5 +109,5 @@ suite "metrics collector":
setupCollector()
expect system.KeyError:
discard chronos_exec_time_total.value(
labelValuesParam = @["idle", "idle.nim", "100"])
discard
chronos_exec_time_total.value(labelValuesParam = @["idle", "idle.nim", "100"])

View File

@ -9,7 +9,6 @@ import ../chroprof/profiler
import ./utils
suite "profiler metrics":
setup:
startRecording()
@ -35,7 +34,7 @@ suite "profiler metrics":
check simpleMetrics.wallClockTime == 50.milliseconds
test "should compute correct times for a simple non-blocking future":
proc simple {.async.} =
proc simple() {.async.} =
advanceTime(10.milliseconds)
await advanceTimeAsync(50.milliseconds)
advanceTime(10.milliseconds)
@ -49,7 +48,7 @@ suite "profiler metrics":
check simpleMetrics.wallClockTime == 70.milliseconds
test "should compute correct times for a non-blocking future with multiple pauses":
proc simple {.async.} =
proc simple() {.async.} =
advanceTime(10.milliseconds)
await advanceTimeAsync(50.milliseconds)
advanceTime(10.milliseconds)
@ -211,7 +210,8 @@ suite "profiler metrics":
check childMetrics.childrenExecTime == ZeroDuration
test "should compute the correct number of times a proc gets called":
proc child() {.async.} = discard
proc child() {.async.} =
discard
proc parent() {.async.} =
for i in 1 .. 10:
@ -270,6 +270,7 @@ suite "profiler metrics":
let stillborns = metrics.pairs.toSeq.map(
proc(item: (SrcLoc, AggregateMetrics)): uint =
item[1].stillbornCount).sum
item[1].stillbornCount
).sum
check stillborns == 2

View File

@ -1,8 +1,7 @@
import chronos
import ../chroprof/[api, events, profiler]
type
SimpleEvent* = object
type SimpleEvent* = object
procedure*: string
state*: ExtendedFutureState
@ -16,7 +15,8 @@ var fakeTime*: Moment = Moment.now()
proc recordEvent(event: Event) {.nimcall, gcsafe, raises: [].} =
{.cast(gcsafe).}:
recording.add(
SimpleEvent(procedure: $event.location.procedure, state: event.newState))
SimpleEvent(procedure: $event.location.procedure, state: event.newState)
)
var timeShifted = event
timeShifted.timestamp = fakeTime
@ -25,10 +25,7 @@ proc recordEvent(event: Event) {.nimcall, gcsafe, raises: [].} =
proc recordSegment*(segment: string) =
{.cast(gcsafe).}:
recording.add(SimpleEvent(
procedure: segment,
state: ExtendedFutureState.Running
))
recording.add(SimpleEvent(procedure: segment, state: ExtendedFutureState.Running))
proc stopRecording*(): void =
recording = @[]
@ -63,7 +60,7 @@ proc advanceTimeAsync*(duration: Duration): Future[void] =
# The actual value for the timer is irrelevant, the important thing is that
# this causes the parent to pause before we advance time.
timer = setTimer(Moment.fromNow(10.milliseconds),
completion, cast[pointer](retFuture))
timer =
setTimer(Moment.fromNow(10.milliseconds), completion, cast[pointer](retFuture))
return retFuture