[Fixes: #11520] Rotate mailservers on error conditions

If a mailserver request is failing we disconnect at the 3rd attempt.

Though once we are disconnected, we connect again to the same mailserver
based on ping time.

This commit changes the behavior so that mailservers are penalized if
they are returning errors.

Signed-off-by: Andrea Maria Piana <andrea.maria.piana@gmail.com>
This commit is contained in:
Andrea Maria Piana 2020-12-08 16:31:31 +01:00
parent 5259de7423
commit 1b4dcb7103
No known key found for this signature in database
GPG Key ID: AA6CCA6DE0E06424
3 changed files with 60 additions and 9 deletions

View File

@ -11,6 +11,9 @@
(def max-limit 1000) (def max-limit 1000)
(def backoff-interval-ms 3000) (def backoff-interval-ms 3000)
(def default-limit max-limit) (def default-limit max-limit)
;; If a mailserver fails, how long before we should consider them again
;; for selection, in ms
(def cooloff-period 120000)
(def connection-timeout (def connection-timeout
"Time after which mailserver connection is considered to have failed" "Time after which mailserver connection is considered to have failed"
10000) 10000)

View File

@ -100,10 +100,11 @@
:method "admin_removePeer" :method "admin_removePeer"
:params [enode]} :params [enode]}
payload (.stringify js/JSON (clj->js args))] payload (.stringify js/JSON (clj->js args))]
(when enode
(status/call-private-rpc payload (status/call-private-rpc payload
(handlers/response-handler (handlers/response-handler
#(log/info "mailserver: remove-peer success" %) #(log/info "mailserver: remove-peer success" %)
#(log/error "mailserver: remove-peer error" %))))) #(log/error "mailserver: remove-peer error" %))))))
(re-frame/reg-fx (re-frame/reg-fx
:mailserver/add-peer :mailserver/add-peer
@ -272,6 +273,31 @@
:on-success :on-success
#(re-frame/dispatch [::get-latency-callback %])}]})) #(re-frame/dispatch [::get-latency-callback %])}]}))
(fx/defn log-mailserver-failure [{:keys [db now]}]
(when-let [mailserver (fetch-current db)]
{:db (assoc-in db [:mailserver/failures (:address mailserver)] now)}))
(defn sort-mailservers
"Sort mailservers sorts the mailservers by recent failures, and by rtt
for breaking ties"
[{:keys [now db]} mailservers]
(let [mailserver-failures (:mailserver/failures db)
sort-fn (fn [a b]
(let [failures-a (get mailserver-failures (:address a))
failures-b (get mailserver-failures (:address b))
has-a-failed? (boolean
(and failures-a (<= (- now failures-a) constants/cooloff-period)))
has-b-failed? (boolean
(and failures-b (<= (- now failures-b) constants/cooloff-period)))]
;; If both have failed, or none of them, then compare rtt
(cond
(= has-a-failed? has-b-failed?)
(compare (:rttMs a) (:rttMs b))
;; Otherwise prefer the one that has not failed recently
has-a-failed? 1
has-b-failed? -1)))]
(sort sort-fn mailservers)))
(fx/defn set-current-mailserver-with-lowest-latency (fx/defn set-current-mailserver-with-lowest-latency
"Picks a random mailserver amongs the ones with the lowest latency "Picks a random mailserver amongs the ones with the lowest latency
The results with error are ignored The results with error are ignored
@ -281,7 +307,7 @@
(let [successful-pings (remove :error latency-results)] (let [successful-pings (remove :error latency-results)]
(when (seq successful-pings) (when (seq successful-pings)
(let [address (-> (take (pool-size (count successful-pings)) (let [address (-> (take (pool-size (count successful-pings))
(sort-by :rttMs successful-pings)) (sort-mailservers cofx successful-pings))
rand-nth rand-nth
:address) :address)
mailserver-id (mailserver-address->id db address)] mailserver-id (mailserver-address->id db address)]
@ -575,10 +601,14 @@
(when (contains? db :multiaccount) (when (contains? db :multiaccount)
(let [last-connection-attempt (:mailserver/last-connection-attempt db)] (let [last-connection-attempt (:mailserver/last-connection-attempt db)]
(when (and (fetch-use-mailservers? cofx) (when (and (fetch-use-mailservers? cofx)
(<= (- now last-connection-attempt))) ;; We are not connected
(fx/merge cofx (not= :connected (:mailserver/state db))
(when (not= :connected (:mailserver/state db)) ;; We either never tried to connect to this peer
(change-mailserver))))))) (or (nil? last-connection-attempt)
;; Or 30 seconds have passed and no luck
(<= (- now last-connection-attempt) (* constants/connection-timeout 3))))
;; Then we change mailserver
(change-mailserver cofx)))))
(fx/defn reset-request-to (fx/defn reset-request-to
[{:keys [db]}] [{:keys [db]}]
@ -974,6 +1004,7 @@
(:attempts current-request)) (:attempts current-request))
(fx/merge cofx (fx/merge cofx
{:db (update db :mailserver/current-request dissoc :attempts)} {:db (update db :mailserver/current-request dissoc :attempts)}
(log-mailserver-failure)
(change-mailserver)) (change-mailserver))
(let [mailserver (get-mailserver-when-ready cofx) (let [mailserver (get-mailserver-when-ready cofx)
offline? (= :offline (:network-status db))] offline? (= :offline (:network-status db))]

View File

@ -879,3 +879,20 @@
{:from 2 {:from 2
:to 8} :to 8}
#{"chat1"})))))) #{"chat1"}))))))
(deftest sort-mailserver-test
(testing "it orders them by whether they have failed first and by rtts"
(let [now (inc constants/cooloff-period)
mailserver-failures {:a 1
:b 0}
cofx {:now now
:db {:mailserver/failures mailserver-failures}}
mailserver-pings [{:address :a :rttMs 2}
{:address :b :rttMs 3}
{:address :d :rttMs 1}
{:address :e :rttMs 4}]
expected-order [{:address :d :rttMs 1}
{:address :b :rttMs 3}
{:address :e :rttMs 4}
{:address :a :rttMs 2}]]
(is (= expected-order (mailserver/sort-mailservers cofx mailserver-pings))))))