diff --git a/src/taoensso/timbre/profiling.clj b/src/taoensso/timbre/profiling.clj index 4085e63..eaafd15 100644 --- a/src/taoensso/timbre/profiling.clj +++ b/src/taoensso/timbre/profiling.clj @@ -1,272 +1,231 @@ (ns taoensso.timbre.profiling - "Simple logging profiler for Timbre. Highly optimized; supports - sampled profiling in production." - {:author "Peter Taoussanis (@ptaoussanis)"} - (:require [taoensso.encore :as enc :refer (qb)] - [taoensso.timbre :as timbre]) - (:import [java.util HashMap LinkedList])) + "Logging profiler for Timbre, adapted from clojure.contrib.profile." + {:author "Peter Taoussanis"} + (:require [taoensso.encore :as enc] + [taoensso.timbre :as timbre])) -;;;; TODO +;;;; TODO ns could use some housekeeping +;; * Boxed math optimizations +;; * Possible porting to .cljx (any point?) ;; * Support for explicit `config` args? -;; * Consider a .cljx port? Any demand for this kind of cljs profiling? -;; * Support for real level+ns based elision (zero *pdata* check cost, etc.)? -;; - E.g. perhaps `p` forms could take a logging level? +;; * General housekeeping, perf work ;;;; Utils -;; Note that we only support *compile-time* ids -(defn- qualified-kw [ns id] (if (enc/qualified-keyword? id) id (keyword (str ns) (name id)))) -(comment (qualified-kw *ns* "foo")) +(defmacro fq-keyword "Returns namespaced keyword for given id." + [id] `(if (and (keyword? ~id) (namespace ~id)) ~id + (keyword ~(str *ns*) (name ~id)))) -(def ^:private elide-profiling? - "Completely elide all profiling? In particular, eliminates proxy checks. - TODO Temp, until we have a better elision strategy." - (enc/read-sys-val "TIMBRE_ELIDE_PROFILING")) +(comment (map #(fq-keyword %) ["foo" :foo :foo/bar])) + +;; TODO May be preferable if our `p` forms could actually take a logging level? +;; Need to think about this. Might just be time to refactor this entire ns + design +(def ^:private elide-profiling? "Experimental" + (when-let [s (System/getenv "TIMBRE_ELIDE_PROFILING")] (enc/read-edn s))) ;;;; -;; We establish one of these (thread local) to enable profiling. -(deftype PData [m-times m-stats]) ; [?{ } ?{ }] -(defmacro -new-pdata [] `(PData. nil nil)) +(def ^:dynamic *pdata* + "{::pid {:times [t1 t2 ...] ; Times awaiting merge into stats + :ntimes _ ; (count times) + :stats {} ; Cumulative stats + }}" + nil) -;; This is substantially faster than a ^:dynamic volatile: -(def -pdata-proxy - (let [^ThreadLocal proxy (proxy [ThreadLocal] [])] - (fn - ([] (.get proxy)) ; nnil iff profiling enabled - ([new-val] (.set proxy new-val) new-val)))) - -(declare ^:private times->stats) -(defn -capture-time! - - ([id t-elapsed] ; Just for dev/debugging - (-capture-time! (-pdata-proxy) id t-elapsed)) - - ([^PData pdata id t-elapsed] ; Common case - (let [m-times (.m-times pdata) - m-stats (.m-stats pdata)] - - (if-let [^LinkedList times (get m-times id)] - (if (== (.size times) #_20 2000000) ; Rare in real-world use - ;; Compact: merge interim stats to help prevent OOMs - (let [stats (times->stats times (get m-stats id)) - times (LinkedList.)] - (.add times t-elapsed) - (-pdata-proxy - (PData. (assoc m-times id times) - (assoc m-stats id stats)))) - - ;; Common case - (.add times t-elapsed)) - - ;; Init case - (let [times (LinkedList.)] - (.add times t-elapsed) - (-pdata-proxy (PData. (assoc m-times id times) - m-stats)))) - - nil))) - -;; Just for dev/debugging -(defmacro -with-pdata [& body] - `(try - (-pdata-proxy (-new-pdata)) - (do ~@body) - (finally (-pdata-proxy nil)))) - -(comment - (-with-pdata (qb 1e6 (-capture-time! :foo 1000))) ; 70.84 - (-with-pdata - (dotimes [_ 20] (-capture-time! :foo 100000)) - (.m-times ^PData (-pdata-proxy)))) - -(defn- times->stats [^LinkedList times ?base-stats] - (let [ntimes (.size times) - times (into [] times) ; Faster to reduce - ts-count (if (zero? ntimes) 1 ntimes) - ts-time (reduce (fn [^long acc ^long in] (+ acc in)) times) - ts-mean (/ (double ts-time) (double ts-count)) - ts-mad-sum (reduce (fn [^long acc ^long in] (+ acc (Math/abs (- in ts-mean)))) 0 times) - ts-min (reduce (fn [^long acc ^long in] (if (< in acc) in acc)) Long/MAX_VALUE times) - ts-max (reduce (fn [^long acc ^long in] (if (> in acc) in acc)) 0 times)] - - (if-let [stats ?base-stats] ; Merge over previous stats - (let [s-count (+ ^long (get stats :count) ts-count) - s-time (+ ^long (get stats :time) ts-time) - s-mean (/ (double s-time) (double s-count)) - s-mad-sum (+ ^long (get stats :mad-sum) ts-mad-sum) - s-mad (/ (double s-mad-sum) (double s-count)) - s0-min (get stats :min) - s0-max (get stats :max)] - - ;; Batched "online" MAD calculation here is >= the standard - ;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc, - ;; http://goo.gl/mx5eSK. - - {:count s-count - :time s-time - :mean s-mean - :mad-sum s-mad-sum - :mad s-mad - :min (if (< ^long s0-min ^long ts-min) s0-min ts-min) - :max (if (> ^long s0-max ^long ts-max) s0-max ts-max)}) - - {:count ts-count - :time ts-time - :mean ts-mean - :mad-sum ts-mad-sum - :mad (/ (double ts-mad-sum) (double ts-count)) - :min ts-min - :max ts-max}))) - -(defn -compile-final-stats! "Returns { }" - [clock-time] - (let [^PData pdata (-pdata-proxy) - m-times (.m-times pdata) - m-stats (.m-stats pdata)] - (reduce-kv - (fn [m id times] - (assoc m id (times->stats times (get m-stats id)))) - {:clock-time clock-time} m-times))) - -(comment - (qb 1e5 - (-with-pdata - (-capture-time! :foo 10) - (-capture-time! :foo 20) - (-capture-time! :foo 30) - (-capture-time! :foo 10) - (-compile-final-stats! 0))) ; 121.83 - ) - -;;;; - -(defn- perc [n d] (Math/round (/ (double n) (double d) 0.01))) -(comment (perc 14 24)) - -(defn- ft [nanosecs] - (let [ns (long nanosecs)] ; Truncate any fractionals - (cond - (>= ns 1000000000) (str (enc/round2 (/ ns 1000000000)) "s") ; 1e9 - (>= ns 1000000) (str (enc/round2 (/ ns 1000000)) "ms") ; 1e6 - (>= ns 1000) (str (enc/round2 (/ ns 1000)) "μs") ; 1e3 - :else (str ns "ns")))) - -(defn -format-stats - ([stats ] (-format-stats stats :time)) - ([stats sort-field] - (let [clock-time (get stats :clock-time) - stats (dissoc stats :clock-time) - ^long accounted (reduce-kv (fn [^long acc k v] (+ acc ^long (:time v))) 0 stats) - - ^long max-id-width - (reduce-kv - (fn [^long acc k v] - (let [c (count (str k))] - (if (> c acc) c acc))) - #=(count "Accounted Time") - stats) - - pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n") - s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n") - - sorted-stat-ids - (sort-by - (fn [id] (get-in stats [id sort-field])) - enc/rcompare - (keys stats))] - - (with-out-str - (printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time") - (enc/run! - (fn [id] - (let [{:keys [count min max mean mad time]} (get stats id)] - (printf pattern id count (ft min) (ft max) (ft mad) - (ft mean) (perc time clock-time) (ft time)))) - sorted-stat-ids) - - (printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time)) - (printf s-pattern "Accounted Time" "" "" "" "" "" - (perc accounted clock-time) (ft accounted)))))) - -;;;; +(declare capture-time! merge-times>stats!) (defmacro pspy - "Profile spy. When thread-local profiling is enabled, records - execution time of named body. Always returns the body's result." + "Profile spy. When in the context of a *pdata* binding, records execution time + of named body. Always returns the body's result." + ;; Note: do NOT implement as `(pspy* ~id (fn [] ~@body))`. The fn wrapping + ;; can cause unnecessary lazy seq head retention, Ref. http://goo.gl/42Vxph. [id & body] - (let [id (qualified-kw *ns* id)] - (if elide-profiling? - `(do ~@body) - `(let [pdata# (-pdata-proxy)] - (if pdata# - (let [t0# (System/nanoTime) - result# (do ~@body) - t1# (System/nanoTime)] - (-capture-time! pdata# ~id (- t1# t0#)) - result#) - (do ~@body)))))) + (if elide-profiling? + `(do ~@body) + `(if-not *pdata* + (do ~@body) + (let [id# (fq-keyword ~id) + t0# (System/nanoTime)] + (try (do ~@body) + (finally (capture-time! id# (- (System/nanoTime) t0#)))))))) (defmacro p [id & body] `(pspy ~id ~@body)) ; Alias (comment (macroexpand '(p :foo (+ 4 2)))) -(defmacro profiled - "Experimental, subject to change! - Low-level profiling util. Executes expr with thread-local profiling - enabled, then executes body with `[ ]` binding, e.g: - (profiled \"foo\" [stats result] (do (println stats) result))" - [expr-to-profile params & body] - (assert (vector? params)) - (assert (= 2 (count params))) - (let [[stats result] params] - `(try - (-pdata-proxy (-new-pdata)) - (let [t0# (System/nanoTime) - ~result ~expr-to-profile - t1# (System/nanoTime) - ~stats (-compile-final-stats! (- t1# t0#))] - (do ~@body)) - (finally (-pdata-proxy nil))))) +(def pspy* + (if elide-profiling? + (fn [id f] (f)) + (fn [id f] + (if-not *pdata* + (f) + (let [id (fq-keyword id) + t0 (System/nanoTime)] + (try (f) + (finally (capture-time! id (- (System/nanoTime) t0))))))))) -(comment (profiled (p :foo "foo") [stats result] [stats result])) +(def p* pspy*) ; Alias + +(comment + (binding [*pdata* {}]) + (time (dotimes [_ 1000000])) ; ~3ms + (time (dotimes [_ 1000000] (pspy :foo))) ; ~65ms (^:dynamic bound >= once!) + ) + +(declare ^:private format-stats) + +(defmacro with-pdata [level & body] + `(if-not (timbre/log? ~level ~(str *ns*)) + {:result (do ~@body)} + (binding [*pdata* (atom {})] + {:result (pspy ::clock-time ~@body) + :stats (merge-times>stats!)}))) (defmacro profile - "When logging is enabled, executes named body with thread-local profiling - enabled and logs profiling stats. Always returns body's result." - [level id & body] - (let [id (qualified-kw *ns* id)] - (if elide-profiling? - `(do ~@body) - `(if (timbre/may-log? ~level ~(str *ns*)) ; Runtime check - (profiled (do ~@body) [stats# result#] - (let [stats-str# (-format-stats stats#)] - (timbre/log! ~level :p - ["Profiling: " ~id "\n" stats-str#] - {:?base-data - {:profile-stats stats# - :profile-stats-str stats-str#}}) - result#)) - (do ~@body))))) + "When logging is enabled, executes named body with profiling enabled. Body + forms wrapped in (pspy) will be timed and time stats logged. Always returns + body's result. -(comment (profile :info :foo "foo")) + Note that logging appenders will receive both a formatted profiling string AND + the raw profiling stats under a special :profiling-stats key (useful for + queryable db logging)." + [level id & body] + `(let [{result# :result stats# :stats} (with-pdata ~level ~@body)] + (when stats# + (timbre/log! ~level :f + ["Profiling: %s\n%s" (fq-keyword ~id) (format-stats stats#)] + {:?base-data {:profile-stats stats#}})) + result#)) (defmacro sampling-profile "Like `profile`, but only enables profiling with given probability." [level probability id & body] - (assert (<= 0 probability 1) "Probability: 0<=p<=1") - (if elide-profiling? - `(do ~@body) - `(if (< (rand) ~probability) - (profile ~level ~id ~@body) - (do ~@body)))) + `(do (assert (<= 0 ~probability 1) "Probability: 0<=p<=1") + (if-not (< (rand) ~probability) (do ~@body) + (profile ~level ~id ~@body)))) -;;;; fnp stuff +;;;; Data capturing & aggregation -(defn -fn-sigs [fn-name sigs] +(def ^:private stats-gc-n 111111) + +(defn capture-time! [id t-elapsed] + (let [ntimes + (-> + (swap! *pdata* + (fn [m] + (let [{:as m-id + :keys [times ntimes] + :or {times [] ntimes 0}} (get m id {})] + (assoc m id + (assoc m-id :times (conj times t-elapsed) + :ntimes (inc ntimes)))))) + (get-in [id :ntimes]))] + (when (= ntimes stats-gc-n) ; Merge to reduce memory footprint + ;; This is so much slower than `capture-time!` swaps that it gets delayed + ;; until after entire profiling call completes!: + ;; (future (merge-times>stats! id)) ; Uses binding conveyance + (p :timbre/stats-gc (merge-times>stats! id))) + nil)) + +(comment + (binding [*pdata* (atom {})] + (capture-time! :foo 100000) + (capture-time! :foo 100000) + *pdata*)) + +(defn merge-times>stats! + ([] ; -> { } + (reduce (fn [m pid] (assoc m pid (merge-times>stats! pid))) + {} (keys (or @*pdata* {})))) + + ([id] ; -> + (-> + (swap! *pdata* + (fn [m] + (let [{:as m-id + :keys [times ntimes stats] + :or {times [] + ntimes 0 + stats {}}} (get m id {})] + (if (empty? times) m + (let [ts-count (max 1 ntimes) + ts-time (reduce + times) + ts-mean (/ ts-time ts-count) + ;; Batched "online" MAD calculation here is >= the standard + ;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc, + ;; http://goo.gl/mx5eSK. + ts-mad-sum (reduce + (map #(Math/abs (long (- % ts-mean))) + times)) ; Mean absolute deviation + ;; + s-count (+ (:count stats 0) ts-count) + s-time (+ (:time stats 0) ts-time) + s-mean (/ s-time s-count) + s-mad-sum (+ (:mad-sum stats 0) ts-mad-sum) + s-mad (/ s-mad-sum s-count) + s-min (apply min (:min stats Double/POSITIVE_INFINITY) times) + s-max (apply max (:max stats 0) times)] + (assoc m id + (assoc m-id + :times [] + :ntimes 0 + :stats {:count s-count + :min s-min + :max s-max + :mean s-mean + :mad-sum s-mad-sum + :mad s-mad + :time s-time}))))))) + (get-in [id :stats])))) + +(comment + (binding [*pdata* (atom {})] + (capture-time! :foo 10) + (capture-time! :foo 20) + (merge-times>stats! :foo) + (capture-time! :foo 30) + (merge-times>stats! :foo) + (merge-times>stats! :bar) + (capture-time! :foo 10) + *pdata*)) + +(defn format-stats [stats & [sort-field]] + (let [clock-time (-> stats ::clock-time :time) ; How long entire profile body took + stats (dissoc stats ::clock-time) + accounted (reduce + (map :time (vals stats))) + max-id-width (apply max (map (comp count str) + (conj (keys stats) "Accounted Time"))) + pattern (str "%" max-id-width "s %,11d %9s %10s %9s %9s %7d %1s%n") + s-pattern (str "%" max-id-width "s %11s %9s %10s %9s %9s %7s %1s%n") + perc #(Math/round (/ %1 %2 0.01)) + ft (fn [nanosecs] + (let [nanosecs (long nanosecs) ; Truncate any fractional nanosecs + pow #(Math/pow 10 %) + ok-pow? #(>= nanosecs (pow %)) + to-pow #(enc/round (/ nanosecs (pow %1)) :round %2)] + (cond (ok-pow? 9) (str (to-pow 9 1) "s") + (ok-pow? 6) (str (to-pow 6 0) "ms") + (ok-pow? 3) (str (to-pow 3 0) "μs") + :else (str nanosecs "ns"))))] + + (with-out-str + (printf s-pattern "Id" "nCalls" "Min" "Max" "MAD" "Mean" "Time%" "Time") + (doseq [pid (->> (keys stats) + (sort-by #(- (get-in stats [% (or sort-field :time)]))))] + (let [{:keys [count min max mean mad time]} (stats pid)] + (printf pattern pid count (ft min) (ft max) (ft mad) + (ft mean) (perc time clock-time) (ft time)))) + + (printf s-pattern "Clock Time" "" "" "" "" "" 100 (ft clock-time)) + (printf s-pattern "Accounted Time" "" "" "" "" "" + (perc accounted clock-time) (ft accounted))))) + +;;;; + +(defn fn-sigs "Implementation detail." + [fn-name sigs] (let [single-arity? (vector? (first sigs)) sigs (if single-arity? (list sigs) sigs) - get-id (if single-arity? + get-pid (if single-arity? (fn [fn-name _params] (name fn-name)) (fn [fn-name params] (str (name fn-name) \_ (count params)))) new-sigs @@ -275,8 +234,8 @@ (let [has-prepost-map? (and (map? (first others)) (next others)) [?prepost-map & body] (if has-prepost-map? others (cons nil others))] (if ?prepost-map - `(~params ~?prepost-map (pspy ~(get-id fn-name params) ~@body)) - `(~params (pspy ~(get-id fn-name params) ~@body))))) + `(~params ~?prepost-map (pspy ~(get-pid fn-name params) ~@body)) + `(~params (pspy ~(get-pid fn-name params) ~@body))))) sigs)] new-sigs)) @@ -284,18 +243,20 @@ {:arglists '([name? [params*] prepost-map? body] [name? ([params*] prepost-map? body)+])} [& sigs] - (let [[?fn-name sigs] (if (symbol? (first sigs)) [(first sigs) (next sigs)] [nil sigs]) - new-sigs (-fn-sigs (or ?fn-name 'anonymous-fn) sigs)] + (let [[?fn-name sigs] (if (symbol? (first sigs)) + [(first sigs) (next sigs)] + [nil sigs]) + new-sigs (fn-sigs (or ?fn-name 'anonymous-fn) sigs)] (if ?fn-name `(fn ~?fn-name ~@new-sigs) `(fn ~@new-sigs)))) (comment - (-fn-sigs "foo" '([x] (* x x))) - (macroexpand '(fnp [x] (* x x))) - (macroexpand '(fn [x] (* x x))) - (macroexpand '(fnp bob [x] {:pre [x]} (* x x))) - (macroexpand '(fn [x] {:pre [x]} (* x x)))) + (fn-sigs "foo" '([x] (* x x))) + (macroexpand '(fnp [x] (* x x))) + (macroexpand '(fn [x] (* x x))) + (macroexpand '(fnp [x] {:pre [x]} (* x x))) + (macroexpand '(fn [x] {:pre [x]} (* x x)))) (defmacro defnp "Like `defn` but wraps fn bodies with `p` macro." {:arglists @@ -303,24 +264,17 @@ [name doc-string? attr-map? ([params*] prepost-map? body)+ attr-map?])} [& sigs] (let [[fn-name sigs] (enc/name-with-attrs (first sigs) (next sigs)) - new-sigs (-fn-sigs fn-name sigs)] + new-sigs (fn-sigs fn-name sigs)] `(defn ~fn-name ~@new-sigs))) (comment - (defnp foo "Docstring" [x] (* x x)) - (macroexpand '(defnp foo "Docstring" [x] (* x x))) - (macroexpand '(defn foo "Docstring" [x] (* x x))) + (defnp foo "Docstring "[x] (* x x)) + (macroexpand '(defnp foo "Docstring" [x] (* x x))) + (macroexpand '(defn foo "Docstring" [x] (* x x))) (macroexpand '(defnp foo "Docstring" ([x] (* x x)) ([x y] (* x y)))) (profile :info :defnp-test (foo 5))) -;;;; Deprecated - -(def pspy* "Deprecated" (fn [_id f] (pspy :pspy*/no-id (f)))) -(def p* "Deprecated" pspy*) - -(comment (profile :info :pspy* (pspy* :foo (fn [] (Thread/sleep 100))))) - ;;;; (comment @@ -334,9 +288,9 @@ (p :10ms (Thread/sleep 10)) "Result")) - (p :hello "Hello, this is a result") ; Falls through (no thread context) + (p :hello "Hello, this is a result") ; Falls through (no *pdata* context) - (defnp my-fn + (defn my-fn [] (let [nums (vec (range 1000))] (+ (p :fast-sleep (Thread/sleep 1) 10) @@ -347,7 +301,12 @@ (p :div (reduce / nums))))) (profile :info :Arithmetic (dotimes [n 100] (my-fn))) - (profile :info :high-n (dotimes [n 1e5] (p :nil nil))) ; ~20ms - (profile :info :high-n (dotimes [n 1e6] (p :nil nil))) ; ~116ms - (profiled (dotimes [n 1e6] (p :nil nil)) [stats result] [stats result]) - (sampling-profile :info 0.5 :sampling-test (p :string "Hello!"))) + (profile :info :high-n (dotimes [n 1e6] (p :divs (/ 1 2 3 4 5 6 7 8 9)))) + (let [;; MAD = 154.0ms, natural: + ;; n->s {0 10 1 100 2 50 3 500 4 8 5 300 6 32 7 433 8 213 9 48} + ;; MAD = 236.0ms, pathological: + n->s {0 10 1 11 2 5 3 18 4 7 5 2 6 300 7 400 8 600 9 700}] + (with-redefs [stats-gc-n 3] + (profile :info :high-sigma (dotimes [n 10] (p :sleep (Thread/sleep (n->s n))))))) + + (sampling-profile :info 0.2 :sampling-test (p :string "Hello!")))