Revert profiling to Timbre v4.4.0 state

Specifically:
  - Reverting some experimental new features (`profiled`), etc.
  - Restoring old multi-threaded (dynamic) profiling support

The new thread-local performance improvements have instead been
moved to Tufte: https://github.com/ptaoussanis/tufte
This commit is contained in:
Peter Taoussanis 2016-07-11 15:06:47 +07:00
parent d6f10795a6
commit 01bbf48d6f
1 changed files with 223 additions and 264 deletions

View File

@ -1,272 +1,231 @@
(ns taoensso.timbre.profiling (ns taoensso.timbre.profiling
"Simple logging profiler for Timbre. Highly optimized; supports "Logging profiler for Timbre, adapted from clojure.contrib.profile."
sampled profiling in production." {:author "Peter Taoussanis"}
{:author "Peter Taoussanis (@ptaoussanis)"} (:require [taoensso.encore :as enc]
(:require [taoensso.encore :as enc :refer (qb)] [taoensso.timbre :as timbre]))
[taoensso.timbre :as timbre])
(:import [java.util HashMap LinkedList]))
;;;; TODO ;;;; TODO ns could use some housekeeping
;; * Boxed math optimizations
;; * Possible porting to .cljx (any point?)
;; * Support for explicit `config` args? ;; * Support for explicit `config` args?
;; * Consider a .cljx port? Any demand for this kind of cljs profiling? ;; * General housekeeping, perf work
;; * Support for real level+ns based elision (zero *pdata* check cost, etc.)?
;; - E.g. perhaps `p` forms could take a logging level?
;;;; Utils ;;;; Utils
;; Note that we only support *compile-time* ids (defmacro fq-keyword "Returns namespaced keyword for given id."
(defn- qualified-kw [ns id] (if (enc/qualified-keyword? id) id (keyword (str ns) (name id)))) [id] `(if (and (keyword? ~id) (namespace ~id)) ~id
(comment (qualified-kw *ns* "foo")) (keyword ~(str *ns*) (name ~id))))
(def ^:private elide-profiling? (comment (map #(fq-keyword %) ["foo" :foo :foo/bar]))
"Completely elide all profiling? In particular, eliminates proxy checks.
TODO Temp, until we have a better elision strategy." ;; TODO May be preferable if our `p` forms could actually take a logging level?
(enc/read-sys-val "TIMBRE_ELIDE_PROFILING")) ;; Need to think about this. Might just be time to refactor this entire ns + design
(def ^:private elide-profiling? "Experimental"
(when-let [s (System/getenv "TIMBRE_ELIDE_PROFILING")] (enc/read-edn s)))
;;;; ;;;;
;; We establish one of these (thread local) to enable profiling. (def ^:dynamic *pdata*
(deftype PData [m-times m-stats]) ; [?{<id> <LinkedList>} ?{<id> <interim-stats>}] "{::pid {:times [t1 t2 ...] ; Times awaiting merge into stats
(defmacro -new-pdata [] `(PData. nil nil)) :ntimes _ ; (count times)
:stats {} ; Cumulative stats
}}"
nil)
;; This is substantially faster than a ^:dynamic volatile: (declare capture-time! merge-times>stats!)
(def -pdata-proxy
(let [^ThreadLocal proxy (proxy [ThreadLocal] [])]
(fn
([] (.get proxy)) ; nnil iff profiling enabled
([new-val] (.set proxy new-val) new-val))))
(declare ^:private times->stats)
(defn -capture-time!
([id t-elapsed] ; Just for dev/debugging
(-capture-time! (-pdata-proxy) id t-elapsed))
([^PData pdata id t-elapsed] ; Common case
(let [m-times (.m-times pdata)
m-stats (.m-stats pdata)]
(if-let [^LinkedList times (get m-times id)]
(if (== (.size times) #_20 2000000) ; Rare in real-world use
;; Compact: merge interim stats to help prevent OOMs
(let [stats (times->stats times (get m-stats id))
times (LinkedList.)]
(.add times t-elapsed)
(-pdata-proxy
(PData. (assoc m-times id times)
(assoc m-stats id stats))))
;; Common case
(.add times t-elapsed))
;; Init case
(let [times (LinkedList.)]
(.add times t-elapsed)
(-pdata-proxy (PData. (assoc m-times id times)
m-stats))))
nil)))
;; Just for dev/debugging
(defmacro -with-pdata [& body]
`(try
(-pdata-proxy (-new-pdata))
(do ~@body)
(finally (-pdata-proxy nil))))
(comment
(-with-pdata (qb 1e6 (-capture-time! :foo 1000))) ; 70.84
(-with-pdata
(dotimes [_ 20] (-capture-time! :foo 100000))
(.m-times ^PData (-pdata-proxy))))
(defn- times->stats [^LinkedList times ?base-stats]
(let [ntimes (.size times)
times (into [] times) ; Faster to reduce
ts-count (if (zero? ntimes) 1 ntimes)
ts-time (reduce (fn [^long acc ^long in] (+ acc in)) times)
ts-mean (/ (double ts-time) (double ts-count))
ts-mad-sum (reduce (fn [^long acc ^long in] (+ acc (Math/abs (- in ts-mean)))) 0 times)
ts-min (reduce (fn [^long acc ^long in] (if (< in acc) in acc)) Long/MAX_VALUE times)
ts-max (reduce (fn [^long acc ^long in] (if (> in acc) in acc)) 0 times)]
(if-let [stats ?base-stats] ; Merge over previous stats
(let [s-count (+ ^long (get stats :count) ts-count)
s-time (+ ^long (get stats :time) ts-time)
s-mean (/ (double s-time) (double s-count))
s-mad-sum (+ ^long (get stats :mad-sum) ts-mad-sum)
s-mad (/ (double s-mad-sum) (double s-count))
s0-min (get stats :min)
s0-max (get stats :max)]
;; Batched "online" MAD calculation here is >= the standard
;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc,
;; http://goo.gl/mx5eSK.
{:count s-count
:time s-time
:mean s-mean
:mad-sum s-mad-sum
:mad s-mad
:min (if (< ^long s0-min ^long ts-min) s0-min ts-min)
:max (if (> ^long s0-max ^long ts-max) s0-max ts-max)})
{:count ts-count
:time ts-time
:mean ts-mean
:mad-sum ts-mad-sum
:mad (/ (double ts-mad-sum) (double ts-count))
:min ts-min
:max ts-max})))
(defn -compile-final-stats! "Returns {<id> <stats>}"
[clock-time]
(let [^PData pdata (-pdata-proxy)
m-times (.m-times pdata)
m-stats (.m-stats pdata)]
(reduce-kv
(fn [m id times]
(assoc m id (times->stats times (get m-stats id))))
{:clock-time clock-time} m-times)))
(comment
(qb 1e5
(-with-pdata
(-capture-time! :foo 10)
(-capture-time! :foo 20)
(-capture-time! :foo 30)
(-capture-time! :foo 10)
(-compile-final-stats! 0))) ; 121.83
)
;;;;
(defn- perc [n d] (Math/round (/ (double n) (double d) 0.01)))
(comment (perc 14 24))
(defn- ft [nanosecs]
(let [ns (long nanosecs)] ; Truncate any fractionals
(cond
(>= ns 1000000000) (str (enc/round2 (/ ns 1000000000)) "s") ; 1e9
(>= ns 1000000) (str (enc/round2 (/ ns 1000000)) "ms") ; 1e6
(>= ns 1000) (str (enc/round2 (/ ns 1000)) "μs") ; 1e3
:else (str ns "ns"))))
(defn -format-stats
([stats ] (-format-stats stats :time))
([stats sort-field]
(let [clock-time (get stats :clock-time)
stats (dissoc stats :clock-time)
^long accounted (reduce-kv (fn [^long acc k v] (+ acc ^long (:time v))) 0 stats)
^long max-id-width
(reduce-kv
(fn [^long acc k v]
(let [c (count (str k))]
(if (> c acc) c acc)))
#=(count "Accounted Time")
stats)
pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n")
s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n")
sorted-stat-ids
(sort-by
(fn [id] (get-in stats [id sort-field]))
enc/rcompare
(keys stats))]
(with-out-str
(printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time")
(enc/run!
(fn [id]
(let [{:keys [count min max mean mad time]} (get stats id)]
(printf pattern id count (ft min) (ft max) (ft mad)
(ft mean) (perc time clock-time) (ft time))))
sorted-stat-ids)
(printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time))
(printf s-pattern "Accounted Time" "" "" "" "" ""
(perc accounted clock-time) (ft accounted))))))
;;;;
(defmacro pspy (defmacro pspy
"Profile spy. When thread-local profiling is enabled, records "Profile spy. When in the context of a *pdata* binding, records execution time
execution time of named body. Always returns the body's result." of named body. Always returns the body's result."
;; Note: do NOT implement as `(pspy* ~id (fn [] ~@body))`. The fn wrapping
;; can cause unnecessary lazy seq head retention, Ref. http://goo.gl/42Vxph.
[id & body] [id & body]
(let [id (qualified-kw *ns* id)]
(if elide-profiling? (if elide-profiling?
`(do ~@body) `(do ~@body)
`(let [pdata# (-pdata-proxy)] `(if-not *pdata*
(if pdata# (do ~@body)
(let [t0# (System/nanoTime) (let [id# (fq-keyword ~id)
result# (do ~@body) t0# (System/nanoTime)]
t1# (System/nanoTime)] (try (do ~@body)
(-capture-time! pdata# ~id (- t1# t0#)) (finally (capture-time! id# (- (System/nanoTime) t0#))))))))
result#)
(do ~@body))))))
(defmacro p [id & body] `(pspy ~id ~@body)) ; Alias (defmacro p [id & body] `(pspy ~id ~@body)) ; Alias
(comment (macroexpand '(p :foo (+ 4 2)))) (comment (macroexpand '(p :foo (+ 4 2))))
(defmacro profiled (def pspy*
"Experimental, subject to change! (if elide-profiling?
Low-level profiling util. Executes expr with thread-local profiling (fn [id f] (f))
enabled, then executes body with `[<stats> <expr-result>]` binding, e.g: (fn [id f]
(profiled \"foo\" [stats result] (do (println stats) result))" (if-not *pdata*
[expr-to-profile params & body] (f)
(assert (vector? params)) (let [id (fq-keyword id)
(assert (= 2 (count params))) t0 (System/nanoTime)]
(let [[stats result] params] (try (f)
`(try (finally (capture-time! id (- (System/nanoTime) t0)))))))))
(-pdata-proxy (-new-pdata))
(let [t0# (System/nanoTime)
~result ~expr-to-profile
t1# (System/nanoTime)
~stats (-compile-final-stats! (- t1# t0#))]
(do ~@body))
(finally (-pdata-proxy nil)))))
(comment (profiled (p :foo "foo") [stats result] [stats result])) (def p* pspy*) ; Alias
(comment
(binding [*pdata* {}])
(time (dotimes [_ 1000000])) ; ~3ms
(time (dotimes [_ 1000000] (pspy :foo))) ; ~65ms (^:dynamic bound >= once!)
)
(declare ^:private format-stats)
(defmacro with-pdata [level & body]
`(if-not (timbre/log? ~level ~(str *ns*))
{:result (do ~@body)}
(binding [*pdata* (atom {})]
{:result (pspy ::clock-time ~@body)
:stats (merge-times>stats!)})))
(defmacro profile (defmacro profile
"When logging is enabled, executes named body with thread-local profiling "When logging is enabled, executes named body with profiling enabled. Body
enabled and logs profiling stats. Always returns body's result." forms wrapped in (pspy) will be timed and time stats logged. Always returns
[level id & body] body's result.
(let [id (qualified-kw *ns* id)]
(if elide-profiling?
`(do ~@body)
`(if (timbre/may-log? ~level ~(str *ns*)) ; Runtime check
(profiled (do ~@body) [stats# result#]
(let [stats-str# (-format-stats stats#)]
(timbre/log! ~level :p
["Profiling: " ~id "\n" stats-str#]
{:?base-data
{:profile-stats stats#
:profile-stats-str stats-str#}})
result#))
(do ~@body)))))
(comment (profile :info :foo "foo")) Note that logging appenders will receive both a formatted profiling string AND
the raw profiling stats under a special :profiling-stats key (useful for
queryable db logging)."
[level id & body]
`(let [{result# :result stats# :stats} (with-pdata ~level ~@body)]
(when stats#
(timbre/log! ~level :f
["Profiling: %s\n%s" (fq-keyword ~id) (format-stats stats#)]
{:?base-data {:profile-stats stats#}}))
result#))
(defmacro sampling-profile (defmacro sampling-profile
"Like `profile`, but only enables profiling with given probability." "Like `profile`, but only enables profiling with given probability."
[level probability id & body] [level probability id & body]
(assert (<= 0 probability 1) "Probability: 0<=p<=1") `(do (assert (<= 0 ~probability 1) "Probability: 0<=p<=1")
(if elide-profiling? (if-not (< (rand) ~probability) (do ~@body)
`(do ~@body) (profile ~level ~id ~@body))))
`(if (< (rand) ~probability)
(profile ~level ~id ~@body)
(do ~@body))))
;;;; fnp stuff ;;;; Data capturing & aggregation
(defn -fn-sigs [fn-name sigs] (def ^:private stats-gc-n 111111)
(defn capture-time! [id t-elapsed]
(let [ntimes
(->
(swap! *pdata*
(fn [m]
(let [{:as m-id
:keys [times ntimes]
:or {times [] ntimes 0}} (get m id {})]
(assoc m id
(assoc m-id :times (conj times t-elapsed)
:ntimes (inc ntimes))))))
(get-in [id :ntimes]))]
(when (= ntimes stats-gc-n) ; Merge to reduce memory footprint
;; This is so much slower than `capture-time!` swaps that it gets delayed
;; until after entire profiling call completes!:
;; (future (merge-times>stats! id)) ; Uses binding conveyance
(p :timbre/stats-gc (merge-times>stats! id)))
nil))
(comment
(binding [*pdata* (atom {})]
(capture-time! :foo 100000)
(capture-time! :foo 100000)
*pdata*))
(defn merge-times>stats!
([] ; -> {<pid> <merged-stats>}
(reduce (fn [m pid] (assoc m pid (merge-times>stats! pid)))
{} (keys (or @*pdata* {}))))
([id] ; -> <merged-stats>
(->
(swap! *pdata*
(fn [m]
(let [{:as m-id
:keys [times ntimes stats]
:or {times []
ntimes 0
stats {}}} (get m id {})]
(if (empty? times) m
(let [ts-count (max 1 ntimes)
ts-time (reduce + times)
ts-mean (/ ts-time ts-count)
;; Batched "online" MAD calculation here is >= the standard
;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc,
;; http://goo.gl/mx5eSK.
ts-mad-sum (reduce + (map #(Math/abs (long (- % ts-mean)))
times)) ; Mean absolute deviation
;;
s-count (+ (:count stats 0) ts-count)
s-time (+ (:time stats 0) ts-time)
s-mean (/ s-time s-count)
s-mad-sum (+ (:mad-sum stats 0) ts-mad-sum)
s-mad (/ s-mad-sum s-count)
s-min (apply min (:min stats Double/POSITIVE_INFINITY) times)
s-max (apply max (:max stats 0) times)]
(assoc m id
(assoc m-id
:times []
:ntimes 0
:stats {:count s-count
:min s-min
:max s-max
:mean s-mean
:mad-sum s-mad-sum
:mad s-mad
:time s-time})))))))
(get-in [id :stats]))))
(comment
(binding [*pdata* (atom {})]
(capture-time! :foo 10)
(capture-time! :foo 20)
(merge-times>stats! :foo)
(capture-time! :foo 30)
(merge-times>stats! :foo)
(merge-times>stats! :bar)
(capture-time! :foo 10)
*pdata*))
(defn format-stats [stats & [sort-field]]
(let [clock-time (-> stats ::clock-time :time) ; How long entire profile body took
stats (dissoc stats ::clock-time)
accounted (reduce + (map :time (vals stats)))
max-id-width (apply max (map (comp count str)
(conj (keys stats) "Accounted Time")))
pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n")
s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n")
perc #(Math/round (/ %1 %2 0.01))
ft (fn [nanosecs]
(let [nanosecs (long nanosecs) ; Truncate any fractional nanosecs
pow #(Math/pow 10 %)
ok-pow? #(>= nanosecs (pow %))
to-pow #(enc/round (/ nanosecs (pow %1)) :round %2)]
(cond (ok-pow? 9) (str (to-pow 9 1) "s")
(ok-pow? 6) (str (to-pow 6 0) "ms")
(ok-pow? 3) (str (to-pow 3 0) "μs")
:else (str nanosecs "ns"))))]
(with-out-str
(printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time")
(doseq [pid (->> (keys stats)
(sort-by #(- (get-in stats [% (or sort-field :time)]))))]
(let [{:keys [count min max mean mad time]} (stats pid)]
(printf pattern pid count (ft min) (ft max) (ft mad)
(ft mean) (perc time clock-time) (ft time))))
(printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time))
(printf s-pattern "Accounted Time" "" "" "" "" ""
(perc accounted clock-time) (ft accounted)))))
;;;;
(defn fn-sigs "Implementation detail."
[fn-name sigs]
(let [single-arity? (vector? (first sigs)) (let [single-arity? (vector? (first sigs))
sigs (if single-arity? (list sigs) sigs) sigs (if single-arity? (list sigs) sigs)
get-id (if single-arity? get-pid (if single-arity?
(fn [fn-name _params] (name fn-name)) (fn [fn-name _params] (name fn-name))
(fn [fn-name params] (str (name fn-name) \_ (count params)))) (fn [fn-name params] (str (name fn-name) \_ (count params))))
new-sigs new-sigs
@ -275,8 +234,8 @@
(let [has-prepost-map? (and (map? (first others)) (next others)) (let [has-prepost-map? (and (map? (first others)) (next others))
[?prepost-map & body] (if has-prepost-map? others (cons nil others))] [?prepost-map & body] (if has-prepost-map? others (cons nil others))]
(if ?prepost-map (if ?prepost-map
`(~params ~?prepost-map (pspy ~(get-id fn-name params) ~@body)) `(~params ~?prepost-map (pspy ~(get-pid fn-name params) ~@body))
`(~params (pspy ~(get-id fn-name params) ~@body))))) `(~params (pspy ~(get-pid fn-name params) ~@body)))))
sigs)] sigs)]
new-sigs)) new-sigs))
@ -284,17 +243,19 @@
{:arglists '([name? [params*] prepost-map? body] {:arglists '([name? [params*] prepost-map? body]
[name? ([params*] prepost-map? body)+])} [name? ([params*] prepost-map? body)+])}
[& sigs] [& sigs]
(let [[?fn-name sigs] (if (symbol? (first sigs)) [(first sigs) (next sigs)] [nil sigs]) (let [[?fn-name sigs] (if (symbol? (first sigs))
new-sigs (-fn-sigs (or ?fn-name 'anonymous-fn) sigs)] [(first sigs) (next sigs)]
[nil sigs])
new-sigs (fn-sigs (or ?fn-name 'anonymous-fn) sigs)]
(if ?fn-name (if ?fn-name
`(fn ~?fn-name ~@new-sigs) `(fn ~?fn-name ~@new-sigs)
`(fn ~@new-sigs)))) `(fn ~@new-sigs))))
(comment (comment
(-fn-sigs "foo" '([x] (* x x))) (fn-sigs "foo" '([x] (* x x)))
(macroexpand '(fnp [x] (* x x))) (macroexpand '(fnp [x] (* x x)))
(macroexpand '(fn [x] (* x x))) (macroexpand '(fn [x] (* x x)))
(macroexpand '(fnp bob [x] {:pre [x]} (* x x))) (macroexpand '(fnp [x] {:pre [x]} (* x x)))
(macroexpand '(fn [x] {:pre [x]} (* x x)))) (macroexpand '(fn [x] {:pre [x]} (* x x))))
(defmacro defnp "Like `defn` but wraps fn bodies with `p` macro." (defmacro defnp "Like `defn` but wraps fn bodies with `p` macro."
@ -303,24 +264,17 @@
[name doc-string? attr-map? ([params*] prepost-map? body)+ attr-map?])} [name doc-string? attr-map? ([params*] prepost-map? body)+ attr-map?])}
[& sigs] [& sigs]
(let [[fn-name sigs] (enc/name-with-attrs (first sigs) (next sigs)) (let [[fn-name sigs] (enc/name-with-attrs (first sigs) (next sigs))
new-sigs (-fn-sigs fn-name sigs)] new-sigs (fn-sigs fn-name sigs)]
`(defn ~fn-name ~@new-sigs))) `(defn ~fn-name ~@new-sigs)))
(comment (comment
(defnp foo "Docstring" [x] (* x x)) (defnp foo "Docstring "[x] (* x x))
(macroexpand '(defnp foo "Docstring" [x] (* x x))) (macroexpand '(defnp foo "Docstring" [x] (* x x)))
(macroexpand '(defn foo "Docstring" [x] (* x x))) (macroexpand '(defn foo "Docstring" [x] (* x x)))
(macroexpand '(defnp foo "Docstring" ([x] (* x x)) (macroexpand '(defnp foo "Docstring" ([x] (* x x))
([x y] (* x y)))) ([x y] (* x y))))
(profile :info :defnp-test (foo 5))) (profile :info :defnp-test (foo 5)))
;;;; Deprecated
(def pspy* "Deprecated" (fn [_id f] (pspy :pspy*/no-id (f))))
(def p* "Deprecated" pspy*)
(comment (profile :info :pspy* (pspy* :foo (fn [] (Thread/sleep 100)))))
;;;; ;;;;
(comment (comment
@ -334,9 +288,9 @@
(p :10ms (Thread/sleep 10)) (p :10ms (Thread/sleep 10))
"Result")) "Result"))
(p :hello "Hello, this is a result") ; Falls through (no thread context) (p :hello "Hello, this is a result") ; Falls through (no *pdata* context)
(defnp my-fn (defn my-fn
[] []
(let [nums (vec (range 1000))] (let [nums (vec (range 1000))]
(+ (p :fast-sleep (Thread/sleep 1) 10) (+ (p :fast-sleep (Thread/sleep 1) 10)
@ -347,7 +301,12 @@
(p :div (reduce / nums))))) (p :div (reduce / nums)))))
(profile :info :Arithmetic (dotimes [n 100] (my-fn))) (profile :info :Arithmetic (dotimes [n 100] (my-fn)))
(profile :info :high-n (dotimes [n 1e5] (p :nil nil))) ; ~20ms (profile :info :high-n (dotimes [n 1e6] (p :divs (/ 1 2 3 4 5 6 7 8 9))))
(profile :info :high-n (dotimes [n 1e6] (p :nil nil))) ; ~116ms (let [;; MAD = 154.0ms, natural:
(profiled (dotimes [n 1e6] (p :nil nil)) [stats result] [stats result]) ;; n->s {0 10 1 100 2 50 3 500 4 8 5 300 6 32 7 433 8 213 9 48}
(sampling-profile :info 0.5 :sampling-test (p :string "Hello!"))) ;; MAD = 236.0ms, pathological:
n->s {0 10 1 11 2 5 3 18 4 7 5 2 6 300 7 400 8 600 9 700}]
(with-redefs [stats-gc-n 3]
(profile :info :high-sigma (dotimes [n 10] (p :sleep (Thread/sleep (n->s n)))))))
(sampling-profile :info 0.2 :sampling-test (p :string "Hello!")))