From 012f19eb48be35eaeac5dc3c9e98fcc7b3f61c3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dandelion=20Man=C3=A9?= <decentralion@dandelion.io>
Date: Fri, 23 Aug 2019 17:51:35 +0200
Subject: [PATCH] discourse fetch: add rate limiting (#1321)

This implements rate limiting to the Discourse fetch logic, so that we
can actually load nontrivial servers without getting a 529 failure.

We could have used retry; I thought it was more polite to actually limit
the rate at which we make requests. However, to avoid seeing 529s in
practice, I left a bit of a buffer: we make only 55 requests per minute,
although 60 would be allowed.

If we want to improve Discourse loading time, we could boost up to the
full 60 request/min, but add in retries. (Or we could switch to retries
entirely.)

Test plan: This logic is untested, however my full discourse-plugin
branch uses it to do full Discourse loads without issue.
---
 package.json                        |  1 +
 src/plugins/discourse/fetch.js      | 29 +++++++++++++++++++++++++++--
 src/plugins/discourse/fetch.test.js |  6 +++---
 yarn.lock                           |  5 +++++
 4 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/package.json b/package.json
index ce04502..f484760 100644
--- a/package.json
+++ b/package.json
@@ -6,6 +6,7 @@
     "aphrodite": "^2.1.0",
     "base64url": "^3.0.1",
     "better-sqlite3": "^5.4.0",
+    "bottleneck": "^2.19.5",
     "chalk": "2.4.2",
     "commonmark": "^0.29.0",
     "d3-array": "^2.2.0",
diff --git a/src/plugins/discourse/fetch.js b/src/plugins/discourse/fetch.js
index 2558c28..a1336ee 100644
--- a/src/plugins/discourse/fetch.js
+++ b/src/plugins/discourse/fetch.js
@@ -12,6 +12,8 @@
 
 import stringify from "json-stable-stringify";
 import fetch from "isomorphic-fetch";
+import Bottleneck from "bottleneck";
+import * as NullUtil from "../../util/null";
 
 export type UserId = number;
 export type PostId = number;
@@ -90,17 +92,40 @@ export interface Discourse {
   likesByUser(targetUsername: string, offset: number): Promise<LikeAction[]>;
 }
 
+const MAX_API_REQUESTS_PER_MINUTE = 55;
+
 export class Fetcher implements Discourse {
+  // We limit the rate of API requests, as documented here:
+  // https://meta.discourse.org/t/global-rate-limits-and-throttling-in-discourse/78612
+  // Note this limit is for admin API keys. If we change to user user API keys
+  // (would be convenient as the keys would be less sensitive), we will need to lower
+  // this rate limit by a factor of 3
+  // TODO: I've set the max requests per minute to 55 (below the stated limit
+  // of 60) to be a bit conservative, and avoid getting limited by the server.
+  // We could improve our throughput by increasing the requests per minute to the
+  // stated limit, and incorporating retry logic to account for the occasional 529.
+
   +options: DiscourseFetchOptions;
   +_fetchImplementation: typeof fetch;
 
   constructor(
     options: DiscourseFetchOptions,
     // fetchImplementation shouldn't be provided by clients, but is convenient for testing.
-    fetchImplementation?: typeof fetch
+    fetchImplementation?: typeof fetch,
+    // Used to avoid going over the Discourse API rate limit
+    minTimeMs?: number
   ) {
     this.options = options;
-    this._fetchImplementation = fetchImplementation || fetch;
+    const minTime = NullUtil.orElse(
+      minTimeMs,
+      (1000 * 60) / MAX_API_REQUESTS_PER_MINUTE
+    );
+    // n.b. the rate limiting isn't programmatically tested. However, it's easy
+    // to tell when it's broken: try to load a nontrivial Discourse server, and see
+    // if you get a 429 failure.
+    const limiter = new Bottleneck({minTime});
+    const unlimitedFetch = NullUtil.orElse(fetchImplementation, fetch);
+    this._fetchImplementation = limiter.wrap(unlimitedFetch);
   }
 
   _fetch(endpoint: string): Promise<Response> {
diff --git a/src/plugins/discourse/fetch.test.js b/src/plugins/discourse/fetch.test.js
index a94c069..fa170fc 100644
--- a/src/plugins/discourse/fetch.test.js
+++ b/src/plugins/discourse/fetch.test.js
@@ -27,7 +27,7 @@ describe("plugins/discourse/fetch", () => {
         throw new Error(`couldn't load snapshot for ${file}`);
       }
     }
-    const snapshotFetcher = () => new Fetcher(options, snapshotFetch);
+    const snapshotFetcher = () => new Fetcher(options, snapshotFetch, 0);
 
     it("loads LatestTopicId from snapshot", async () => {
       const topicId = await snapshotFetcher().latestTopicId();
@@ -55,7 +55,7 @@ describe("plugins/discourse/fetch", () => {
       return Promise.resolve(resp);
     };
     const fetcherWithStatus = (status: number) =>
-      new Fetcher(options, fakeFetch(status));
+      new Fetcher(options, fakeFetch(status), 0);
     function expectError(name, f, status) {
       it(`${name} errors on ${String(status)}`, () => {
         const fetcher = fetcherWithStatus(status);
@@ -96,7 +96,7 @@ describe("plugins/discourse/fetch", () => {
         fetchOptions = _options;
         return Promise.resolve(new Response("", {status: 404}));
       };
-      await new Fetcher(options, fakeFetch).post(1337);
+      await new Fetcher(options, fakeFetch, 0).post(1337);
       if (fetchOptions == null) {
         throw new Error("fetchOptions == null");
       }
diff --git a/yarn.lock b/yarn.lock
index 2a3dd65..d02ad35 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1799,6 +1799,11 @@ boolbase@~1.0.0:
   resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
   integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24=
 
+bottleneck@^2.19.5:
+  version "2.19.5"
+  resolved "https://registry.yarnpkg.com/bottleneck/-/bottleneck-2.19.5.tgz#5df0b90f59fd47656ebe63c78a98419205cadd91"
+  integrity sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==
+
 brace-expansion@^1.0.0, brace-expansion@^1.1.7:
   version "1.1.11"
   resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"