From 1b4dcb710377c0bf73eff38fdfe6e9dea9336b76 Mon Sep 17 00:00:00 2001 From: Andrea Maria Piana Date: Tue, 8 Dec 2020 16:31:31 +0100 Subject: [PATCH] [Fixes: #11520] Rotate mailservers on error conditions If a mailserver request is failing we disconnect at the 3rd attempt. Though once we are disconnected, we connect again to the same mailserver based on ping time. This commit changes the behavior so that mailservers are penalized if they are returning errors. Signed-off-by: Andrea Maria Piana --- src/status_im/mailserver/constants.cljs | 3 ++ src/status_im/mailserver/core.cljs | 49 ++++++++++++++++++++----- src/status_im/mailserver/core_test.cljs | 17 +++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/status_im/mailserver/constants.cljs b/src/status_im/mailserver/constants.cljs index 9cf0e51bad..13f66016af 100644 --- a/src/status_im/mailserver/constants.cljs +++ b/src/status_im/mailserver/constants.cljs @@ -11,6 +11,9 @@ (def max-limit 1000) (def backoff-interval-ms 3000) (def default-limit max-limit) +;; If a mailserver fails, how long before we should consider them again +;; for selection, in ms +(def cooloff-period 120000) (def connection-timeout "Time after which mailserver connection is considered to have failed" 10000) diff --git a/src/status_im/mailserver/core.cljs b/src/status_im/mailserver/core.cljs index 216796f99a..f530ea178f 100644 --- a/src/status_im/mailserver/core.cljs +++ b/src/status_im/mailserver/core.cljs @@ -100,10 +100,11 @@ :method "admin_removePeer" :params [enode]} payload (.stringify js/JSON (clj->js args))] - (status/call-private-rpc payload - (handlers/response-handler - #(log/info "mailserver: remove-peer success" %) - #(log/error "mailserver: remove-peer error" %))))) + (when enode + (status/call-private-rpc payload + (handlers/response-handler + #(log/info "mailserver: remove-peer success" %) + #(log/error "mailserver: remove-peer error" %)))))) (re-frame/reg-fx :mailserver/add-peer @@ -272,6 +273,31 @@ :on-success #(re-frame/dispatch [::get-latency-callback %])}]})) +(fx/defn log-mailserver-failure [{:keys [db now]}] + (when-let [mailserver (fetch-current db)] + {:db (assoc-in db [:mailserver/failures (:address mailserver)] now)})) + +(defn sort-mailservers + "Sort mailservers sorts the mailservers by recent failures, and by rtt + for breaking ties" + [{:keys [now db]} mailservers] + (let [mailserver-failures (:mailserver/failures db) + sort-fn (fn [a b] + (let [failures-a (get mailserver-failures (:address a)) + failures-b (get mailserver-failures (:address b)) + has-a-failed? (boolean + (and failures-a (<= (- now failures-a) constants/cooloff-period))) + has-b-failed? (boolean + (and failures-b (<= (- now failures-b) constants/cooloff-period)))] + ;; If both have failed, or none of them, then compare rtt + (cond + (= has-a-failed? has-b-failed?) + (compare (:rttMs a) (:rttMs b)) + ;; Otherwise prefer the one that has not failed recently + has-a-failed? 1 + has-b-failed? -1)))] + (sort sort-fn mailservers))) + (fx/defn set-current-mailserver-with-lowest-latency "Picks a random mailserver amongs the ones with the lowest latency The results with error are ignored @@ -281,7 +307,7 @@ (let [successful-pings (remove :error latency-results)] (when (seq successful-pings) (let [address (-> (take (pool-size (count successful-pings)) - (sort-by :rttMs successful-pings)) + (sort-mailservers cofx successful-pings)) rand-nth :address) mailserver-id (mailserver-address->id db address)] @@ -575,10 +601,14 @@ (when (contains? db :multiaccount) (let [last-connection-attempt (:mailserver/last-connection-attempt db)] (when (and (fetch-use-mailservers? cofx) - (<= (- now last-connection-attempt))) - (fx/merge cofx - (when (not= :connected (:mailserver/state db)) - (change-mailserver))))))) + ;; We are not connected + (not= :connected (:mailserver/state db)) + ;; We either never tried to connect to this peer + (or (nil? last-connection-attempt) + ;; Or 30 seconds have passed and no luck + (<= (- now last-connection-attempt) (* constants/connection-timeout 3)))) + ;; Then we change mailserver + (change-mailserver cofx))))) (fx/defn reset-request-to [{:keys [db]}] @@ -974,6 +1004,7 @@ (:attempts current-request)) (fx/merge cofx {:db (update db :mailserver/current-request dissoc :attempts)} + (log-mailserver-failure) (change-mailserver)) (let [mailserver (get-mailserver-when-ready cofx) offline? (= :offline (:network-status db))] diff --git a/src/status_im/mailserver/core_test.cljs b/src/status_im/mailserver/core_test.cljs index b50619fcb0..e6fc78360e 100644 --- a/src/status_im/mailserver/core_test.cljs +++ b/src/status_im/mailserver/core_test.cljs @@ -879,3 +879,20 @@ {:from 2 :to 8} #{"chat1"})))))) + +(deftest sort-mailserver-test + (testing "it orders them by whether they have failed first and by rtts" + (let [now (inc constants/cooloff-period) + mailserver-failures {:a 1 + :b 0} + cofx {:now now + :db {:mailserver/failures mailserver-failures}} + mailserver-pings [{:address :a :rttMs 2} + {:address :b :rttMs 3} + {:address :d :rttMs 1} + {:address :e :rttMs 4}] + expected-order [{:address :d :rttMs 1} + {:address :b :rttMs 3} + {:address :e :rttMs 4} + {:address :a :rttMs 2}]] + (is (= expected-order (mailserver/sort-mailservers cofx mailserver-pings))))))