Profiling: final performance work (nb: don't squash)

Choosing not to squash into previous commit since we're introducing
a bunch of JVM-specific code here. If we do later decide to do a
.cljx port, will be handy to have the previous code around as a
platform agnostic option.
This commit is contained in:
Peter Taoussanis 2016-06-23 14:06:35 +07:00
parent d7b22c53c4
commit 7b9d9b31c8
1 changed files with 47 additions and 64 deletions

View File

@ -3,7 +3,8 @@
sampled profiling in production." sampled profiling in production."
{:author "Peter Taoussanis (@ptaoussanis)"} {:author "Peter Taoussanis (@ptaoussanis)"}
(:require [taoensso.encore :as enc :refer (qb)] (:require [taoensso.encore :as enc :refer (qb)]
[taoensso.timbre :as timbre])) [taoensso.timbre :as timbre])
(:import [java.util HashMap LinkedList]))
;;;; TODO ;;;; TODO
;; * Support for explicit `config` args? ;; * Support for explicit `config` args?
@ -24,16 +25,9 @@
;;;; ;;;;
;; We accumulate times in one of these.
;; Counted cons perf > list > (transient []).
(deftype Times [cons count])
;; We establish one of these (thread local) to enable profiling. ;; We establish one of these (thread local) to enable profiling.
(deftype PData [m-times m-stats]) ; [{<id> <Times>} ?{<id> <?interim-stats>}] (deftype PData [m-times m-stats]) ; [?{<id> <LinkedList>} ?{<id> <interim-stats>}]
(defmacro -new-pdata [] `(PData. nil nil))
(defmacro -new-pdata [] `(PData. (transient {}) nil))
;; (defmacro -new-pdata_ [] `(enc/-vol! (-new-pdata)))
;; (def ^:dynamic *pdata_* nil)
;; This is substantially faster than a ^:dynamic volatile: ;; This is substantially faster than a ^:dynamic volatile:
(def -pdata-proxy (def -pdata-proxy
@ -49,60 +43,50 @@
(-capture-time! (-pdata-proxy) id t-elapsed)) (-capture-time! (-pdata-proxy) id t-elapsed))
([^PData pdata id t-elapsed] ; Common case ([^PData pdata id t-elapsed] ; Common case
(let [;; pdata_ *pdata_* (let [m-times (.m-times pdata)
;; ^PData pdata @pdata_
;; ^PData pdata (-pdata-proxy)
m-times (.m-times pdata)
m-stats (.m-stats pdata)] m-stats (.m-stats pdata)]
(if-let [^Times times (get m-times id)] (if-let [^LinkedList times (get m-times id)]
(let [^long ntimes (.count times)] (if (== (.size times) #_20 2000000) ; Rare in real-world use
(if (== ntimes 2000000 #_20) ; Rare in real-world use ;; Compact: merge interim stats to help prevent OOMs
;; Compact: merge interim stats to help prevent OOMs (let [stats (times->stats times (get m-stats id))
;; (print (str "\nCompacting: " (times->stats times (get m-stats id)) "\n")) times (LinkedList.)]
;; (enc/-vol-reset! pdata_) (.add times t-elapsed)
(-pdata-proxy (-pdata-proxy
(PData. (PData. (assoc m-times id times)
(assoc! m-times id (Times. (cons t-elapsed nil) 1)) (assoc m-stats id stats))))
(assoc m-stats id (times->stats times (get m-stats id)))))
;; Common case ;; Common case
;; (enc/-vol-reset! pdata_) (.add times t-elapsed))
(-pdata-proxy
(PData.
(assoc! m-times id (Times. (cons t-elapsed (.cons times))
(inc ntimes)))
m-stats))))
;; Init case ;; Init case
;; (enc/-vol-reset! pdata_) (let [times (LinkedList.)]
(-pdata-proxy (.add times t-elapsed)
(PData. (-pdata-proxy (PData. (assoc m-times id times)
(assoc! m-times id (Times. (cons t-elapsed nil) 1)) m-stats))))
m-stats)))
nil))) nil)))
;; (defmacro -with-pdata [& body] `(binding [*pdata_* (new-pdata_)] (do ~@body))) ;; Just for dev/debugging
(defmacro -with-pdata [& body] ; Just for dev/debugging (defmacro -with-pdata [& body]
`(try `(try
(-pdata-proxy (-new-pdata)) (-pdata-proxy (-new-pdata))
(do ~@body) (do ~@body)
(finally (-pdata-proxy nil)))) (finally (-pdata-proxy nil))))
(comment (comment
(-with-pdata (qb 1e6 (-capture-time! :foo 1000))) ; 182.88 (-with-pdata (qb 1e6 (-capture-time! :foo 1000))) ; 70.84
(-with-pdata (-with-pdata
(dotimes [_ 20] (-capture-time! :foo 100000)) (dotimes [_ 20] (-capture-time! :foo 100000))
(.cons ^Times (:foo (persistent! (.m-times ^PData (-pdata-proxy))))))) (.m-times ^PData (-pdata-proxy))))
(defn- times->stats [^Times times ?base-stats] (defn- times->stats [^LinkedList times ?base-stats]
(let [^long ntimes (.count times) (let [ntimes (.size times)
times (.cons times) times (into [] times) ; Faster to reduce
ts-count (if (zero? ntimes) 1 ntimes) ts-count (if (zero? ntimes) 1 ntimes)
ts-time (reduce (fn [^long acc ^long in] (+ acc in)) times) ts-time (reduce (fn [^long acc ^long in] (+ acc in)) times)
ts-mean (/ (double ts-time) (double ts-count)) ts-mean (/ (double ts-time) (double ts-count))
ts-mad-sum (reduce (fn [^long acc ^long in] (+ acc (Math/abs (- in ts-mean)))) 0 times) ts-mad-sum (reduce (fn [^long acc ^long in] (+ acc (Math/abs (- in ts-mean)))) 0 times)
ts-min (reduce (fn [^long acc ^long in] (if (< in acc) in acc)) Long/MAX_VALUE times) ts-min (reduce (fn [^long acc ^long in] (if (< in acc) in acc)) Long/MAX_VALUE times)
ts-max (reduce (fn [^long acc ^long in] (if (> in acc) in acc)) 0 times)] ts-max (reduce (fn [^long acc ^long in] (if (> in acc) in acc)) 0 times)]
@ -136,23 +120,24 @@
:max ts-max}))) :max ts-max})))
(defn -compile-final-stats! "Returns {<id> <stats>}" (defn -compile-final-stats! "Returns {<id> <stats>}"
[^PData pdata clock-time] [clock-time]
(let [;; PData should be discarded; cannot be reused after `persistent!`: (let [^PData pdata (-pdata-proxy)
m-times (persistent! (.m-times pdata)) m-times (.m-times pdata)
m-stats (.m-stats pdata)] m-stats (.m-stats pdata)]
(reduce-kv (reduce-kv
(fn [m id times] (fn [m id times]
(assoc m id (times->stats times (get m-stats id)))) (assoc m id (times->stats times (get m-stats id))))
{:clock-time clock-time} m-times))) {:clock-time clock-time} m-times)))
(comment (comment
(-with-pdata (qb 1e5
(-capture-time! :foo 10) (-with-pdata
(-capture-time! :foo 20) (-capture-time! :foo 10)
(-capture-time! :foo 30) (-capture-time! :foo 20)
(-capture-time! :foo 10) (-capture-time! :foo 30)
(-compile-final-stats! (-pdata-proxy) 0))) (-capture-time! :foo 10)
(-compile-final-stats! 0))) ; 121.83
)
;;;; ;;;;
@ -227,19 +212,17 @@
(defmacro profile (defmacro profile
"When logging is enabled, executes named body with thread-local profiling "When logging is enabled, executes named body with thread-local profiling
enabled. Body forms wrapped by `pspy` will be timed and time stats logged. enabled and logs profiling stats. Always returns body's result."
Always returns body's result."
[level id & body] [level id & body]
(let [id (qualified-kw *ns* id)] (let [id (qualified-kw *ns* id)]
(if elide-profiling? (if elide-profiling?
`(do ~@body) `(do ~@body)
`(if (timbre/log? ~level ~(str *ns*)) ; Runtime check `(if (timbre/log? ~level ~(str *ns*)) ; Runtime check
(try (try
(let [pdata# (-pdata-proxy (-new-pdata)) (-pdata-proxy (-new-pdata))
t0# (System/nanoTime) (let [t0# (System/nanoTime)
result# (do ~@body) result# (do ~@body)
stats# (-compile-final-stats! pdata# stats# (-compile-final-stats! (- (System/nanoTime) t0#))
(- (System/nanoTime) t0#))
stats-str# (-format-stats stats#)] stats-str# (-format-stats stats#)]
(timbre/log! ~level :p (timbre/log! ~level :p
["Profiling: " ~id "\n" stats-str#] ["Profiling: " ~id "\n" stats-str#]
@ -346,6 +329,6 @@
(p :div (reduce / nums))))) (p :div (reduce / nums)))))
(profile :info :Arithmetic (dotimes [n 100] (my-fn))) (profile :info :Arithmetic (dotimes [n 100] (my-fn)))
(profile :info :high-n (dotimes [n 1e5] (p :nil nil))) ; 31ms (profile :info :high-n (dotimes [n 1e5] (p :nil nil))) ; ~19ms
(profile :info :high-n (dotimes [n 1e6] (p :nil nil))) ; ~232ms (profile :info :high-n (dotimes [n 1e6] (p :nil nil))) ; ~116ms
(sampling-profile :info 0.5 :sampling-test (p :string "Hello!"))) (sampling-profile :info 0.5 :sampling-test (p :string "Hello!")))