Revert profiling to Timbre v4.4.0 state

Specifically:
  - Reverting some experimental new features (`profiled`), etc.
  - Restoring old multi-threaded (dynamic) profiling support

The new thread-local performance improvements have instead been
moved to Tufte: https://github.com/ptaoussanis/tufte
This commit is contained in:
Peter Taoussanis 2016-07-11 15:06:47 +07:00
parent d6f10795a6
commit 01bbf48d6f
1 changed files with 223 additions and 264 deletions

View File

@ -1,272 +1,231 @@
(ns taoensso.timbre.profiling
"Simple logging profiler for Timbre. Highly optimized; supports
sampled profiling in production."
{:author "Peter Taoussanis (@ptaoussanis)"}
(:require [taoensso.encore :as enc :refer (qb)]
[taoensso.timbre :as timbre])
(:import [java.util HashMap LinkedList]))
"Logging profiler for Timbre, adapted from clojure.contrib.profile."
{:author "Peter Taoussanis"}
(:require [taoensso.encore :as enc]
[taoensso.timbre :as timbre]))
;;;; TODO
;;;; TODO ns could use some housekeeping
;; * Boxed math optimizations
;; * Possible porting to .cljx (any point?)
;; * Support for explicit `config` args?
;; * Consider a .cljx port? Any demand for this kind of cljs profiling?
;; * Support for real level+ns based elision (zero *pdata* check cost, etc.)?
;; - E.g. perhaps `p` forms could take a logging level?
;; * General housekeeping, perf work
;;;; Utils
;; Note that we only support *compile-time* ids
(defn- qualified-kw [ns id] (if (enc/qualified-keyword? id) id (keyword (str ns) (name id))))
(comment (qualified-kw *ns* "foo"))
(defmacro fq-keyword "Returns namespaced keyword for given id."
[id] `(if (and (keyword? ~id) (namespace ~id)) ~id
(keyword ~(str *ns*) (name ~id))))
(def ^:private elide-profiling?
"Completely elide all profiling? In particular, eliminates proxy checks.
TODO Temp, until we have a better elision strategy."
(enc/read-sys-val "TIMBRE_ELIDE_PROFILING"))
(comment (map #(fq-keyword %) ["foo" :foo :foo/bar]))
;; TODO May be preferable if our `p` forms could actually take a logging level?
;; Need to think about this. Might just be time to refactor this entire ns + design
(def ^:private elide-profiling? "Experimental"
(when-let [s (System/getenv "TIMBRE_ELIDE_PROFILING")] (enc/read-edn s)))
;;;;
;; We establish one of these (thread local) to enable profiling.
(deftype PData [m-times m-stats]) ; [?{<id> <LinkedList>} ?{<id> <interim-stats>}]
(defmacro -new-pdata [] `(PData. nil nil))
(def ^:dynamic *pdata*
"{::pid {:times [t1 t2 ...] ; Times awaiting merge into stats
:ntimes _ ; (count times)
:stats {} ; Cumulative stats
}}"
nil)
;; This is substantially faster than a ^:dynamic volatile:
(def -pdata-proxy
(let [^ThreadLocal proxy (proxy [ThreadLocal] [])]
(fn
([] (.get proxy)) ; nnil iff profiling enabled
([new-val] (.set proxy new-val) new-val))))
(declare ^:private times->stats)
(defn -capture-time!
([id t-elapsed] ; Just for dev/debugging
(-capture-time! (-pdata-proxy) id t-elapsed))
([^PData pdata id t-elapsed] ; Common case
(let [m-times (.m-times pdata)
m-stats (.m-stats pdata)]
(if-let [^LinkedList times (get m-times id)]
(if (== (.size times) #_20 2000000) ; Rare in real-world use
;; Compact: merge interim stats to help prevent OOMs
(let [stats (times->stats times (get m-stats id))
times (LinkedList.)]
(.add times t-elapsed)
(-pdata-proxy
(PData. (assoc m-times id times)
(assoc m-stats id stats))))
;; Common case
(.add times t-elapsed))
;; Init case
(let [times (LinkedList.)]
(.add times t-elapsed)
(-pdata-proxy (PData. (assoc m-times id times)
m-stats))))
nil)))
;; Just for dev/debugging
(defmacro -with-pdata [& body]
`(try
(-pdata-proxy (-new-pdata))
(do ~@body)
(finally (-pdata-proxy nil))))
(comment
(-with-pdata (qb 1e6 (-capture-time! :foo 1000))) ; 70.84
(-with-pdata
(dotimes [_ 20] (-capture-time! :foo 100000))
(.m-times ^PData (-pdata-proxy))))
(defn- times->stats [^LinkedList times ?base-stats]
(let [ntimes (.size times)
times (into [] times) ; Faster to reduce
ts-count (if (zero? ntimes) 1 ntimes)
ts-time (reduce (fn [^long acc ^long in] (+ acc in)) times)
ts-mean (/ (double ts-time) (double ts-count))
ts-mad-sum (reduce (fn [^long acc ^long in] (+ acc (Math/abs (- in ts-mean)))) 0 times)
ts-min (reduce (fn [^long acc ^long in] (if (< in acc) in acc)) Long/MAX_VALUE times)
ts-max (reduce (fn [^long acc ^long in] (if (> in acc) in acc)) 0 times)]
(if-let [stats ?base-stats] ; Merge over previous stats
(let [s-count (+ ^long (get stats :count) ts-count)
s-time (+ ^long (get stats :time) ts-time)
s-mean (/ (double s-time) (double s-count))
s-mad-sum (+ ^long (get stats :mad-sum) ts-mad-sum)
s-mad (/ (double s-mad-sum) (double s-count))
s0-min (get stats :min)
s0-max (get stats :max)]
;; Batched "online" MAD calculation here is >= the standard
;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc,
;; http://goo.gl/mx5eSK.
{:count s-count
:time s-time
:mean s-mean
:mad-sum s-mad-sum
:mad s-mad
:min (if (< ^long s0-min ^long ts-min) s0-min ts-min)
:max (if (> ^long s0-max ^long ts-max) s0-max ts-max)})
{:count ts-count
:time ts-time
:mean ts-mean
:mad-sum ts-mad-sum
:mad (/ (double ts-mad-sum) (double ts-count))
:min ts-min
:max ts-max})))
(defn -compile-final-stats! "Returns {<id> <stats>}"
[clock-time]
(let [^PData pdata (-pdata-proxy)
m-times (.m-times pdata)
m-stats (.m-stats pdata)]
(reduce-kv
(fn [m id times]
(assoc m id (times->stats times (get m-stats id))))
{:clock-time clock-time} m-times)))
(comment
(qb 1e5
(-with-pdata
(-capture-time! :foo 10)
(-capture-time! :foo 20)
(-capture-time! :foo 30)
(-capture-time! :foo 10)
(-compile-final-stats! 0))) ; 121.83
)
;;;;
(defn- perc [n d] (Math/round (/ (double n) (double d) 0.01)))
(comment (perc 14 24))
(defn- ft [nanosecs]
(let [ns (long nanosecs)] ; Truncate any fractionals
(cond
(>= ns 1000000000) (str (enc/round2 (/ ns 1000000000)) "s") ; 1e9
(>= ns 1000000) (str (enc/round2 (/ ns 1000000)) "ms") ; 1e6
(>= ns 1000) (str (enc/round2 (/ ns 1000)) "μs") ; 1e3
:else (str ns "ns"))))
(defn -format-stats
([stats ] (-format-stats stats :time))
([stats sort-field]
(let [clock-time (get stats :clock-time)
stats (dissoc stats :clock-time)
^long accounted (reduce-kv (fn [^long acc k v] (+ acc ^long (:time v))) 0 stats)
^long max-id-width
(reduce-kv
(fn [^long acc k v]
(let [c (count (str k))]
(if (> c acc) c acc)))
#=(count "Accounted Time")
stats)
pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n")
s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n")
sorted-stat-ids
(sort-by
(fn [id] (get-in stats [id sort-field]))
enc/rcompare
(keys stats))]
(with-out-str
(printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time")
(enc/run!
(fn [id]
(let [{:keys [count min max mean mad time]} (get stats id)]
(printf pattern id count (ft min) (ft max) (ft mad)
(ft mean) (perc time clock-time) (ft time))))
sorted-stat-ids)
(printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time))
(printf s-pattern "Accounted Time" "" "" "" "" ""
(perc accounted clock-time) (ft accounted))))))
;;;;
(declare capture-time! merge-times>stats!)
(defmacro pspy
"Profile spy. When thread-local profiling is enabled, records
execution time of named body. Always returns the body's result."
"Profile spy. When in the context of a *pdata* binding, records execution time
of named body. Always returns the body's result."
;; Note: do NOT implement as `(pspy* ~id (fn [] ~@body))`. The fn wrapping
;; can cause unnecessary lazy seq head retention, Ref. http://goo.gl/42Vxph.
[id & body]
(let [id (qualified-kw *ns* id)]
(if elide-profiling?
`(do ~@body)
`(let [pdata# (-pdata-proxy)]
(if pdata#
(let [t0# (System/nanoTime)
result# (do ~@body)
t1# (System/nanoTime)]
(-capture-time! pdata# ~id (- t1# t0#))
result#)
(do ~@body))))))
`(if-not *pdata*
(do ~@body)
(let [id# (fq-keyword ~id)
t0# (System/nanoTime)]
(try (do ~@body)
(finally (capture-time! id# (- (System/nanoTime) t0#))))))))
(defmacro p [id & body] `(pspy ~id ~@body)) ; Alias
(comment (macroexpand '(p :foo (+ 4 2))))
(defmacro profiled
"Experimental, subject to change!
Low-level profiling util. Executes expr with thread-local profiling
enabled, then executes body with `[<stats> <expr-result>]` binding, e.g:
(profiled \"foo\" [stats result] (do (println stats) result))"
[expr-to-profile params & body]
(assert (vector? params))
(assert (= 2 (count params)))
(let [[stats result] params]
`(try
(-pdata-proxy (-new-pdata))
(let [t0# (System/nanoTime)
~result ~expr-to-profile
t1# (System/nanoTime)
~stats (-compile-final-stats! (- t1# t0#))]
(do ~@body))
(finally (-pdata-proxy nil)))))
(def pspy*
(if elide-profiling?
(fn [id f] (f))
(fn [id f]
(if-not *pdata*
(f)
(let [id (fq-keyword id)
t0 (System/nanoTime)]
(try (f)
(finally (capture-time! id (- (System/nanoTime) t0)))))))))
(comment (profiled (p :foo "foo") [stats result] [stats result]))
(def p* pspy*) ; Alias
(comment
(binding [*pdata* {}])
(time (dotimes [_ 1000000])) ; ~3ms
(time (dotimes [_ 1000000] (pspy :foo))) ; ~65ms (^:dynamic bound >= once!)
)
(declare ^:private format-stats)
(defmacro with-pdata [level & body]
`(if-not (timbre/log? ~level ~(str *ns*))
{:result (do ~@body)}
(binding [*pdata* (atom {})]
{:result (pspy ::clock-time ~@body)
:stats (merge-times>stats!)})))
(defmacro profile
"When logging is enabled, executes named body with thread-local profiling
enabled and logs profiling stats. Always returns body's result."
[level id & body]
(let [id (qualified-kw *ns* id)]
(if elide-profiling?
`(do ~@body)
`(if (timbre/may-log? ~level ~(str *ns*)) ; Runtime check
(profiled (do ~@body) [stats# result#]
(let [stats-str# (-format-stats stats#)]
(timbre/log! ~level :p
["Profiling: " ~id "\n" stats-str#]
{:?base-data
{:profile-stats stats#
:profile-stats-str stats-str#}})
result#))
(do ~@body)))))
"When logging is enabled, executes named body with profiling enabled. Body
forms wrapped in (pspy) will be timed and time stats logged. Always returns
body's result.
(comment (profile :info :foo "foo"))
Note that logging appenders will receive both a formatted profiling string AND
the raw profiling stats under a special :profiling-stats key (useful for
queryable db logging)."
[level id & body]
`(let [{result# :result stats# :stats} (with-pdata ~level ~@body)]
(when stats#
(timbre/log! ~level :f
["Profiling: %s\n%s" (fq-keyword ~id) (format-stats stats#)]
{:?base-data {:profile-stats stats#}}))
result#))
(defmacro sampling-profile
"Like `profile`, but only enables profiling with given probability."
[level probability id & body]
(assert (<= 0 probability 1) "Probability: 0<=p<=1")
(if elide-profiling?
`(do ~@body)
`(if (< (rand) ~probability)
(profile ~level ~id ~@body)
(do ~@body))))
`(do (assert (<= 0 ~probability 1) "Probability: 0<=p<=1")
(if-not (< (rand) ~probability) (do ~@body)
(profile ~level ~id ~@body))))
;;;; fnp stuff
;;;; Data capturing & aggregation
(defn -fn-sigs [fn-name sigs]
(def ^:private stats-gc-n 111111)
(defn capture-time! [id t-elapsed]
(let [ntimes
(->
(swap! *pdata*
(fn [m]
(let [{:as m-id
:keys [times ntimes]
:or {times [] ntimes 0}} (get m id {})]
(assoc m id
(assoc m-id :times (conj times t-elapsed)
:ntimes (inc ntimes))))))
(get-in [id :ntimes]))]
(when (= ntimes stats-gc-n) ; Merge to reduce memory footprint
;; This is so much slower than `capture-time!` swaps that it gets delayed
;; until after entire profiling call completes!:
;; (future (merge-times>stats! id)) ; Uses binding conveyance
(p :timbre/stats-gc (merge-times>stats! id)))
nil))
(comment
(binding [*pdata* (atom {})]
(capture-time! :foo 100000)
(capture-time! :foo 100000)
*pdata*))
(defn merge-times>stats!
([] ; -> {<pid> <merged-stats>}
(reduce (fn [m pid] (assoc m pid (merge-times>stats! pid)))
{} (keys (or @*pdata* {}))))
([id] ; -> <merged-stats>
(->
(swap! *pdata*
(fn [m]
(let [{:as m-id
:keys [times ntimes stats]
:or {times []
ntimes 0
stats {}}} (get m id {})]
(if (empty? times) m
(let [ts-count (max 1 ntimes)
ts-time (reduce + times)
ts-mean (/ ts-time ts-count)
;; Batched "online" MAD calculation here is >= the standard
;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc,
;; http://goo.gl/mx5eSK.
ts-mad-sum (reduce + (map #(Math/abs (long (- % ts-mean)))
times)) ; Mean absolute deviation
;;
s-count (+ (:count stats 0) ts-count)
s-time (+ (:time stats 0) ts-time)
s-mean (/ s-time s-count)
s-mad-sum (+ (:mad-sum stats 0) ts-mad-sum)
s-mad (/ s-mad-sum s-count)
s-min (apply min (:min stats Double/POSITIVE_INFINITY) times)
s-max (apply max (:max stats 0) times)]
(assoc m id
(assoc m-id
:times []
:ntimes 0
:stats {:count s-count
:min s-min
:max s-max
:mean s-mean
:mad-sum s-mad-sum
:mad s-mad
:time s-time})))))))
(get-in [id :stats]))))
(comment
(binding [*pdata* (atom {})]
(capture-time! :foo 10)
(capture-time! :foo 20)
(merge-times>stats! :foo)
(capture-time! :foo 30)
(merge-times>stats! :foo)
(merge-times>stats! :bar)
(capture-time! :foo 10)
*pdata*))
(defn format-stats [stats & [sort-field]]
(let [clock-time (-> stats ::clock-time :time) ; How long entire profile body took
stats (dissoc stats ::clock-time)
accounted (reduce + (map :time (vals stats)))
max-id-width (apply max (map (comp count str)
(conj (keys stats) "Accounted Time")))
pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n")
s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n")
perc #(Math/round (/ %1 %2 0.01))
ft (fn [nanosecs]
(let [nanosecs (long nanosecs) ; Truncate any fractional nanosecs
pow #(Math/pow 10 %)
ok-pow? #(>= nanosecs (pow %))
to-pow #(enc/round (/ nanosecs (pow %1)) :round %2)]
(cond (ok-pow? 9) (str (to-pow 9 1) "s")
(ok-pow? 6) (str (to-pow 6 0) "ms")
(ok-pow? 3) (str (to-pow 3 0) "μs")
:else (str nanosecs "ns"))))]
(with-out-str
(printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time")
(doseq [pid (->> (keys stats)
(sort-by #(- (get-in stats [% (or sort-field :time)]))))]
(let [{:keys [count min max mean mad time]} (stats pid)]
(printf pattern pid count (ft min) (ft max) (ft mad)
(ft mean) (perc time clock-time) (ft time))))
(printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time))
(printf s-pattern "Accounted Time" "" "" "" "" ""
(perc accounted clock-time) (ft accounted)))))
;;;;
(defn fn-sigs "Implementation detail."
[fn-name sigs]
(let [single-arity? (vector? (first sigs))
sigs (if single-arity? (list sigs) sigs)
get-id (if single-arity?
get-pid (if single-arity?
(fn [fn-name _params] (name fn-name))
(fn [fn-name params] (str (name fn-name) \_ (count params))))
new-sigs
@ -275,8 +234,8 @@
(let [has-prepost-map? (and (map? (first others)) (next others))
[?prepost-map & body] (if has-prepost-map? others (cons nil others))]
(if ?prepost-map
`(~params ~?prepost-map (pspy ~(get-id fn-name params) ~@body))
`(~params (pspy ~(get-id fn-name params) ~@body)))))
`(~params ~?prepost-map (pspy ~(get-pid fn-name params) ~@body))
`(~params (pspy ~(get-pid fn-name params) ~@body)))))
sigs)]
new-sigs))
@ -284,17 +243,19 @@
{:arglists '([name? [params*] prepost-map? body]
[name? ([params*] prepost-map? body)+])}
[& sigs]
(let [[?fn-name sigs] (if (symbol? (first sigs)) [(first sigs) (next sigs)] [nil sigs])
new-sigs (-fn-sigs (or ?fn-name 'anonymous-fn) sigs)]
(let [[?fn-name sigs] (if (symbol? (first sigs))
[(first sigs) (next sigs)]
[nil sigs])
new-sigs (fn-sigs (or ?fn-name 'anonymous-fn) sigs)]
(if ?fn-name
`(fn ~?fn-name ~@new-sigs)
`(fn ~@new-sigs))))
(comment
(-fn-sigs "foo" '([x] (* x x)))
(fn-sigs "foo" '([x] (* x x)))
(macroexpand '(fnp [x] (* x x)))
(macroexpand '(fn [x] (* x x)))
(macroexpand '(fnp bob [x] {:pre [x]} (* x x)))
(macroexpand '(fnp [x] {:pre [x]} (* x x)))
(macroexpand '(fn [x] {:pre [x]} (* x x))))
(defmacro defnp "Like `defn` but wraps fn bodies with `p` macro."
@ -303,7 +264,7 @@
[name doc-string? attr-map? ([params*] prepost-map? body)+ attr-map?])}
[& sigs]
(let [[fn-name sigs] (enc/name-with-attrs (first sigs) (next sigs))
new-sigs (-fn-sigs fn-name sigs)]
new-sigs (fn-sigs fn-name sigs)]
`(defn ~fn-name ~@new-sigs)))
(comment
@ -314,13 +275,6 @@
([x y] (* x y))))
(profile :info :defnp-test (foo 5)))
;;;; Deprecated
(def pspy* "Deprecated" (fn [_id f] (pspy :pspy*/no-id (f))))
(def p* "Deprecated" pspy*)
(comment (profile :info :pspy* (pspy* :foo (fn [] (Thread/sleep 100)))))
;;;;
(comment
@ -334,9 +288,9 @@
(p :10ms (Thread/sleep 10))
"Result"))
(p :hello "Hello, this is a result") ; Falls through (no thread context)
(p :hello "Hello, this is a result") ; Falls through (no *pdata* context)
(defnp my-fn
(defn my-fn
[]
(let [nums (vec (range 1000))]
(+ (p :fast-sleep (Thread/sleep 1) 10)
@ -347,7 +301,12 @@
(p :div (reduce / nums)))))
(profile :info :Arithmetic (dotimes [n 100] (my-fn)))
(profile :info :high-n (dotimes [n 1e5] (p :nil nil))) ; ~20ms
(profile :info :high-n (dotimes [n 1e6] (p :nil nil))) ; ~116ms
(profiled (dotimes [n 1e6] (p :nil nil)) [stats result] [stats result])
(sampling-profile :info 0.5 :sampling-test (p :string "Hello!")))
(profile :info :high-n (dotimes [n 1e6] (p :divs (/ 1 2 3 4 5 6 7 8 9))))
(let [;; MAD = 154.0ms, natural:
;; n->s {0 10 1 100 2 50 3 500 4 8 5 300 6 32 7 433 8 213 9 48}
;; MAD = 236.0ms, pathological:
n->s {0 10 1 11 2 5 3 18 4 7 5 2 6 300 7 400 8 600 9 700}]
(with-redefs [stats-gc-n 3]
(profile :info :high-sigma (dotimes [n 10] (p :sleep (Thread/sleep (n->s n)))))))
(sampling-profile :info 0.2 :sampling-test (p :string "Hello!")))