diff --git a/.gitignore b/.gitignore index 76ef5c3..0607c1c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ nimble.develop nimble.paths build + +.vscode diff --git a/chroprof/config.nim b/chroprof/config.nim new file mode 100644 index 0000000..4eb1049 --- /dev/null +++ b/chroprof/config.nim @@ -0,0 +1 @@ +const chroprofDebug* {.booldefine.}: bool = false diff --git a/chroprof/events.nim b/chroprof/events.nim index 9fceccc..5daa1d3 100644 --- a/chroprof/events.nim +++ b/chroprof/events.nim @@ -23,6 +23,11 @@ type EventCallback* = proc(e: Event) {.nimcall, gcsafe, raises: [].} +const FinishStates* = [ + ExtendedFutureState.Completed, ExtendedFutureState.Cancelled, + ExtendedFutureState.Failed, +] + var handleFutureEvent {.threadvar.}: EventCallback proc `location`*(self: Event): SrcLoc = diff --git a/chroprof/profiler.nim b/chroprof/profiler.nim index c04f3fa..04315c3 100644 --- a/chroprof/profiler.nim +++ b/chroprof/profiler.nim @@ -5,7 +5,9 @@ import std/[tables, options, sets] import chronos/[timer, srcloc] +import ./config import ./events +import ./utils export timer, tables, sets, srcloc @@ -48,16 +50,23 @@ type ## for this `FutureType`. PartialMetrics = object - state*: ExtendedFutureState + ## Tracks `PartialMetric`s for a single run of a given `Future`. `PartialMetrics` + ## may not be complete until the `Future` and its children have reached a + ## finish state. created*: Moment lastStarted*: Moment timeToFirstPause*: Duration partialExecTime*: Duration partialChildrenExecTime*: Duration partialChildrenExecOverlap*: Duration - parent*: Option[uint] + wallclockTime: Duration pauses*: uint + futureType: FutureType + state*: ExtendedFutureState + parent*: Option[uint] + liveChildren: uint + MetricsTotals* = Table[FutureType, AggregateMetrics] ProfilerState* = object @@ -68,36 +77,32 @@ type proc `execTimeWithChildren`*(self: AggregateMetrics): Duration = self.execTime + self.childrenExecTime -proc push(self: var seq[uint], value: uint): void = - self.add(value) - -proc pop(self: var seq[uint]): uint = - let value = self[^1] - self.setLen(self.len - 1) - value - -proc peek(self: var seq[uint]): Option[uint] = - if self.len == 0: - none(uint) - else: - self[^1].some - -proc `$`(location: SrcLoc): string = - $location.procedure & "[" & $location.file & ":" & $location.line & "]" - proc futureCreated(self: var ProfilerState, event: Event): void = assert not self.partials.hasKey(event.futureId), $event.location self.partials[event.futureId] = - PartialMetrics(created: event.timestamp, state: Pending) + PartialMetrics(created: event.timestamp, state: Pending, futureType: event.location) proc bindParent(self: var ProfilerState, metrics: ptr PartialMetrics): void = let current = self.callStack.peek() if current.isNone: + when chroprofDebug: + echo "No parent for ", $metrics.futureType.procedure, ", ", $metrics.state return if metrics.parent.isSome: assert metrics.parent.get == current.get + + self.partials.withValue(current.get, parentMetrics): + parentMetrics.liveChildren += 1 + when chroprofDebug: + echo "SET_PARENT: Parent of ", + $metrics.futureType.procedure, + " is ", + $parentMetrics.futureType.procedure, + ", ", + $metrics.state + metrics.parent = current proc futureRunning(self: var ProfilerState, event: Event): void = @@ -128,40 +133,88 @@ proc futurePaused(self: var ProfilerState, event: Event): void = metrics.pauses += 1 metrics.state = Paused -proc futureCompleted(self: var ProfilerState, event: Event): void = - let location = event.location - if not self.metrics.hasKey(location): - self.metrics[location] = AggregateMetrics() +proc aggregatePartial( + self: var ProfilerState, metrics: ptr PartialMetrics, futureId: uint +): void = + ## Aggregates partial execution metrics into the total metrics for the given + ## `FutureType`. - self.metrics.withValue(location, aggMetrics): - if not self.partials.hasKey(event.futureId): - # Stillborn futures are those born in a finish state. We count those cause - # they may also be a byproduct of a bug. - aggMetrics.stillbornCount.inc() + self.metrics.withValue(metrics.futureType, aggMetrics): + let execTime = metrics.partialExecTime - metrics.partialChildrenExecOverlap + + aggMetrics.callCount.inc() + aggMetrics.execTime += execTime + aggMetrics.execTimeMax = max(aggMetrics.execTimeMax, execTime) + aggMetrics.childrenExecTime += metrics.partialChildrenExecTime + aggMetrics.wallClockTime += metrics.wallclockTime + + if metrics.parent.isSome: + self.partials.withValue(metrics.parent.get, parentMetrics): + when chroprofDebug: + echo $metrics.futureType.procedure, + ": add <<", + metrics.timeToFirstPause, + ">> overlap and <<", + metrics.partialExecTime, + ">> child exec time to parent (", + parentMetrics.futureType.procedure, + ")" + + parentMetrics.partialChildrenExecTime += metrics.partialExecTime + parentMetrics.partialChildrenExecOverlap += metrics.timeToFirstPause + parentMetrics.liveChildren -= 1 + + if parentMetrics.state in FinishStates: + if parentMetrics.liveChildren == 0: + when chroprofDebug: + echo "Perfom deferred aggregation of completed parent with no live children: ", + $parentMetrics.futureType.procedure + self.aggregatePartial(parentMetrics, metrics.parent.get) + else: + when chroprofDebug: + echo "Parent ", + $parentMetrics.futureType.procedure, + " still has ", + parentMetrics.liveChildren, + " live children" + + self.partials.del(futureId) + +proc countStillborn(self: var ProfilerState, futureType: FutureType): void = + self.metrics.withValue(futureType, aggMetrics): + aggMetrics.stillbornCount.inc() + +proc futureCompleted(self: var ProfilerState, event: Event): void = + let futureType = event.location + let futureId = event.futureId + + if not self.metrics.hasKey(futureType): + self.metrics[futureType] = AggregateMetrics() + + if not self.partials.hasKey(futureId): + self.countStillborn(futureType) + return + + self.partials.withValue(futureId, partial): + if partial.state == Running: + self.futurePaused(event) + partial.state = event.newState + + partial.wallclockTime = event.timestamp - partial.created + + # Future still have live children, don't aggregate yet. + if partial.liveChildren > 0: return - self.partials.withValue(event.futureId, metrics): - if metrics.state == Running: - self.futurePaused(event) - - let execTime = metrics.partialExecTime - metrics.partialChildrenExecOverlap - - aggMetrics.callCount.inc() - aggMetrics.execTime += execTime - aggMetrics.execTimeMax = max(aggMetrics.execTimeMax, execTime) - aggMetrics.childrenExecTime += metrics.partialChildrenExecTime - aggMetrics.wallClockTime += event.timestamp - metrics.created - - if metrics.parent.isSome: - self.partials.withValue(metrics.parent.get, parentMetrics): - parentMetrics.partialChildrenExecTime += metrics.partialExecTime - parentMetrics.partialChildrenExecOverlap += metrics.timeToFirstPause - - self.partials.del(event.futureId) + self.aggregatePartial(partial, futureId) proc processEvent*( self: var ProfilerState, event: Event ): void {.nimcall, gcsafe, raises: [].} = + when chroprofDebug: + echo "EVENT:", + $event.location.procedure, ", ", event.newState, ", ", event.timestamp + case event.newState of Pending: self.futureCreated(event) diff --git a/chroprof/utils.nim b/chroprof/utils.nim new file mode 100644 index 0000000..83b0b4a --- /dev/null +++ b/chroprof/utils.nim @@ -0,0 +1,19 @@ +import std/options +import chronos/srcloc + +proc push*(self: var seq[uint], value: uint): void = + self.add(value) + +proc pop*(self: var seq[uint]): uint = + let value = self[^1] + self.setLen(self.len - 1) + value + +proc peek*(self: var seq[uint]): Option[uint] = + if self.len == 0: + none(uint) + else: + self[^1].some + +proc `$`*(location: SrcLoc): string = + $location.procedure & "[" & $location.file & ":" & $location.line & "]" diff --git a/tests/testprofiler.nim b/tests/testprofiler.nim index f88fb1f..90b1406 100644 --- a/tests/testprofiler.nim +++ b/tests/testprofiler.nim @@ -274,3 +274,31 @@ suite "profiler metrics": ).sum check stillborns == 2 + + test "should properly account for children's execution time even when those finish after the parent": + let barrier = newFuture[void]("barrier") + + proc child() {.async.} = + await barrier + advanceTime(10.milliseconds) + + proc parent(): Future[Future[void]] {.async.} = + advanceTime(10.milliseconds) + return child() + + let childFut = waitFor parent() + barrier.complete() + waitFor childFut + + var metrics = recordedMetrics() + let parentMetrics = metrics.forProc("parent") + let childMetrics = metrics.forProc("child") + + check parentMetrics.execTime == 10.milliseconds + check parentMetrics.childrenExecTime == 10.milliseconds + check parentMetrics.wallClockTime == 10.milliseconds + + echo "EXE TIME ", $childMetrics.execTime + check childMetrics.execTime == 10.milliseconds + check childMetrics.wallClockTime == 10.milliseconds + check childMetrics.childrenExecTime == ZeroDuration diff --git a/tests/utils.nim b/tests/utils.nim index 3078aba..5cd7bd1 100644 --- a/tests/utils.nim +++ b/tests/utils.nim @@ -10,7 +10,7 @@ type SimpleEvent* = object # global vars means we can't really do much better for now. var recording*: seq[SimpleEvent] var rawRecording*: seq[Event] -var fakeTime*: Moment = Moment.now() +var fakeTime*: Moment = Moment.init(0, 1.milliseconds) proc recordEvent(event: Event) {.nimcall, gcsafe, raises: [].} = {.cast(gcsafe).}: