From 012f19eb48be35eaeac5dc3c9e98fcc7b3f61c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Fri, 23 Aug 2019 17:51:35 +0200 Subject: [PATCH] discourse fetch: add rate limiting (#1321) This implements rate limiting to the Discourse fetch logic, so that we can actually load nontrivial servers without getting a 529 failure. We could have used retry; I thought it was more polite to actually limit the rate at which we make requests. However, to avoid seeing 529s in practice, I left a bit of a buffer: we make only 55 requests per minute, although 60 would be allowed. If we want to improve Discourse loading time, we could boost up to the full 60 request/min, but add in retries. (Or we could switch to retries entirely.) Test plan: This logic is untested, however my full discourse-plugin branch uses it to do full Discourse loads without issue. --- package.json | 1 + src/plugins/discourse/fetch.js | 29 +++++++++++++++++++++++++++-- src/plugins/discourse/fetch.test.js | 6 +++--- yarn.lock | 5 +++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index ce04502..f484760 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "aphrodite": "^2.1.0", "base64url": "^3.0.1", "better-sqlite3": "^5.4.0", + "bottleneck": "^2.19.5", "chalk": "2.4.2", "commonmark": "^0.29.0", "d3-array": "^2.2.0", diff --git a/src/plugins/discourse/fetch.js b/src/plugins/discourse/fetch.js index 2558c28..a1336ee 100644 --- a/src/plugins/discourse/fetch.js +++ b/src/plugins/discourse/fetch.js @@ -12,6 +12,8 @@ import stringify from "json-stable-stringify"; import fetch from "isomorphic-fetch"; +import Bottleneck from "bottleneck"; +import * as NullUtil from "../../util/null"; export type UserId = number; export type PostId = number; @@ -90,17 +92,40 @@ export interface Discourse { likesByUser(targetUsername: string, offset: number): Promise; } +const MAX_API_REQUESTS_PER_MINUTE = 55; + export class Fetcher implements Discourse { + // We limit the rate of API requests, as documented here: + // https://meta.discourse.org/t/global-rate-limits-and-throttling-in-discourse/78612 + // Note this limit is for admin API keys. If we change to user user API keys + // (would be convenient as the keys would be less sensitive), we will need to lower + // this rate limit by a factor of 3 + // TODO: I've set the max requests per minute to 55 (below the stated limit + // of 60) to be a bit conservative, and avoid getting limited by the server. + // We could improve our throughput by increasing the requests per minute to the + // stated limit, and incorporating retry logic to account for the occasional 529. + +options: DiscourseFetchOptions; +_fetchImplementation: typeof fetch; constructor( options: DiscourseFetchOptions, // fetchImplementation shouldn't be provided by clients, but is convenient for testing. - fetchImplementation?: typeof fetch + fetchImplementation?: typeof fetch, + // Used to avoid going over the Discourse API rate limit + minTimeMs?: number ) { this.options = options; - this._fetchImplementation = fetchImplementation || fetch; + const minTime = NullUtil.orElse( + minTimeMs, + (1000 * 60) / MAX_API_REQUESTS_PER_MINUTE + ); + // n.b. the rate limiting isn't programmatically tested. However, it's easy + // to tell when it's broken: try to load a nontrivial Discourse server, and see + // if you get a 429 failure. + const limiter = new Bottleneck({minTime}); + const unlimitedFetch = NullUtil.orElse(fetchImplementation, fetch); + this._fetchImplementation = limiter.wrap(unlimitedFetch); } _fetch(endpoint: string): Promise { diff --git a/src/plugins/discourse/fetch.test.js b/src/plugins/discourse/fetch.test.js index a94c069..fa170fc 100644 --- a/src/plugins/discourse/fetch.test.js +++ b/src/plugins/discourse/fetch.test.js @@ -27,7 +27,7 @@ describe("plugins/discourse/fetch", () => { throw new Error(`couldn't load snapshot for ${file}`); } } - const snapshotFetcher = () => new Fetcher(options, snapshotFetch); + const snapshotFetcher = () => new Fetcher(options, snapshotFetch, 0); it("loads LatestTopicId from snapshot", async () => { const topicId = await snapshotFetcher().latestTopicId(); @@ -55,7 +55,7 @@ describe("plugins/discourse/fetch", () => { return Promise.resolve(resp); }; const fetcherWithStatus = (status: number) => - new Fetcher(options, fakeFetch(status)); + new Fetcher(options, fakeFetch(status), 0); function expectError(name, f, status) { it(`${name} errors on ${String(status)}`, () => { const fetcher = fetcherWithStatus(status); @@ -96,7 +96,7 @@ describe("plugins/discourse/fetch", () => { fetchOptions = _options; return Promise.resolve(new Response("", {status: 404})); }; - await new Fetcher(options, fakeFetch).post(1337); + await new Fetcher(options, fakeFetch, 0).post(1337); if (fetchOptions == null) { throw new Error("fetchOptions == null"); } diff --git a/yarn.lock b/yarn.lock index 2a3dd65..d02ad35 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1799,6 +1799,11 @@ boolbase@~1.0.0: resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24= +bottleneck@^2.19.5: + version "2.19.5" + resolved "https://registry.yarnpkg.com/bottleneck/-/bottleneck-2.19.5.tgz#5df0b90f59fd47656ebe63c78a98419205cadd91" + integrity sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw== + brace-expansion@^1.0.0, brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"