Experimental: improved profiling MAD accuracy over stat merges

This commit is contained in:
Peter Taoussanis 2014-03-17 19:59:51 +07:00
parent 54197c1dec
commit 23e9566551
1 changed files with 26 additions and 7 deletions

View File

@ -145,15 +145,33 @@
(if (empty? times) m (if (empty? times) m
(let [ts-count (max 1 ntimes) (let [ts-count (max 1 ntimes)
ts-time (reduce + times) ts-time (reduce + times)
ts-mean (long (/ ts-time ts-count)) ts-mean (/ ts-time ts-count)
ts-mad-sum (long (reduce + (map #(Math/abs (long (- % ts-mean))) ;; Non-streaming Mean Absolute Deviation = |ts-mean|/n.
times))) ; Mean absolute deviation ;; To get streaming we instead collect deltas separated by
;; sign. This seems to be more accurate that Knuth/Welford,
;; Ref. http://goo.gl/mx5eSK, http://goo.gl/QLSfOc - probably
;; due to larger batch sizes.
[ts-mad-s+ ts-mad-s-]
(reduce (fn [[s+ s-] t]
(let [delta (- t ts-mean)]
(if (>= delta 0) [(+ s+ delta) s-]
[s+ (+ s- delta)])))
[0 0] times)
;; ;;
s-count (+ (:count stats 0) ts-count) s-count (+ (:count stats 0) ts-count)
s-time (+ (:time stats 0) ts-time) s-time (+ (:time stats 0) ts-time)
s-mean (long (/ s-time s-count)) s-mean (/ s-time s-count)
s-mad-sum (long (+ (:mad-sum stats 0) ts-mad-sum)) ;;
s-mad (long (/ s-mad-sum s-count)) s-mad-s+ (if-not (:count stats) ts-mad-s+
(+ (:mad-s+ stats) ts-mad-s+
(* (:count stats) (- s-mean (:mean stats)))))
s-mad-s- (if-not (:count stats) ts-mad-s-
(+ (:mad-s- stats) ts-mad-s-
(* (:count stats) (- s-mean (:mean stats)))))
s-mad-sum (+ (Math/abs (long s-mad-s+))
(Math/abs (long s-mad-s-)))
s-mad (/ s-mad-sum s-count)
;;
s-min (apply min (:min stats Double/POSITIVE_INFINITY) times) s-min (apply min (:min stats Double/POSITIVE_INFINITY) times)
s-max (apply max (:max stats 0) times)] s-max (apply max (:max stats 0) times)]
(assoc m id (assoc m id
@ -164,7 +182,8 @@
:min s-min :min s-min
:max s-max :max s-max
:mean s-mean :mean s-mean
:mad-sum s-mad-sum :mad-s+ s-mad-s+
:mad-s- s-mad-s-
:mad s-mad :mad s-mad
:time s-time}))))))) :time s-time})))))))
(get-in [id :stats])))) (get-in [id :stats]))))