From 0c2908dbfb9edb2ab10ff9de571e00f3d85d07cd Mon Sep 17 00:00:00 2001 From: William Chargin Date: Wed, 22 Aug 2018 11:37:29 -0700 Subject: [PATCH] Retry GitHub queries with exponential backoff (#699) Summary: This patch adds independent exponential backoff to each individual GitHub GraphQL query. We remove the fixed `GITHUB_DELAY_MS` delay before each query in favor of this solution, which requires no additional configuration (thus resolving a TODO in the process). We use the NPM module `retry` with its default settings: namely, a maximum of 10 retries with factor-2 backoff starting at 1000ms. Empirically, it seems very unlikely that we should require much more than 2 retries for a query. (See Test Plan for more details.) This is both a short-term unblocker and a good kind of thing to have in the long term. Test Plan: Note that `yarn test --full` passes, including `fetchGithubRepoTest.sh`. Consider manual testing as follows. Add `console.info` statements in `retryGithubFetch`, then load a large repository like TensorFlow, and observe the output: ```shell $ node bin/sourcecred.js load --plugin github tensorflow/tensorflow 2>&1 | ts -s '%.s' 0.252566 Fetching repo... 0.258422 Trying... 5.203014 Trying... [snip] 1244.521197 Trying... 1254.848044 Will retry (n=1)... 1260.893334 Trying... 1271.547368 Trying... 1282.094735 Will retry (n=1)... 1283.349192 Will retry (n=2)... 1289.188728 Trying... [snip] 1741.026869 Ensuring no more pages... 1742.139978 Creating view... 1752.023697 Stringifying... 1754.697116 Writing... 1754.697772 Done. ``` This took just under half an hour, with 264 queries total, of which: - 225 queries required 0 retries; - 38 queries required exactly 1 retry; - 1 query required exactly 2 retries; and - 0 queries required 3 or more retries. wchargin-branch: github-backoff --- CHANGELOG.md | 1 + package.json | 1 + src/plugins/github/fetchGithubRepo.js | 107 +++++++++++++++++++++++--- yarn.lock | 4 + 4 files changed, 101 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c480974..9d8d57a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Execute GraphQL queries with exponential backoff (#699) - Introduce a simplified Git plugin that only tracks commits (#685) - Rename cred explorer table columns (#680) - Display version string in the app's footer diff --git a/package.json b/package.json index 28fab1b..e6763d6 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "react": "^16.4.1", "react-dom": "^16.4.1", "react-router": "3.2.1", + "retry": "^0.12.0", "tmp": "^0.0.33", "whatwg-fetch": "2.0.3" }, diff --git a/src/plugins/github/fetchGithubRepo.js b/src/plugins/github/fetchGithubRepo.js index 6d961fb..1bf799c 100644 --- a/src/plugins/github/fetchGithubRepo.js +++ b/src/plugins/github/fetchGithubRepo.js @@ -5,6 +5,7 @@ */ import fetch from "isomorphic-fetch"; +import retry from "retry"; import {stringify, inlineLayout} from "../../graphql/queries"; import {createQuery, createVariables, postQueryExhaustive} from "./graphql"; @@ -49,6 +50,75 @@ export default function fetchGithubRepo( const GITHUB_GRAPHQL_SERVER = "https://api.github.com/graphql"; +type GithubResponseError = + | {|+type: "FETCH_ERROR", retry: false, error: Error|} + | {|+type: "GRAPHQL_ERROR", retry: false, error: mixed|} + | {|+type: "GITHUB_INTERNAL_EXECUTION_ERROR", retry: true, error: mixed|} + | {|+type: "NO_DATA", retry: true, error: mixed|}; + +// Fetch against the GitHub API with the provided options, returning a +// promise that either resolves to the GraphQL result data or rejects +// to a `GithubResponseError`. +function tryGithubFetch(fetch, fetchOptions): Promise { + return fetch(GITHUB_GRAPHQL_SERVER, fetchOptions).then( + (x) => + x.json().then((x) => { + if (x.errors) { + if ( + x.errors.length === 1 && + x.errors[0].message.includes("it could be a GitHub bug") + ) { + return Promise.reject( + ({ + type: "GITHUB_INTERNAL_EXECUTION_ERROR", + retry: true, + error: x, + }: GithubResponseError) + ); + } else { + return Promise.reject( + ({ + type: "GRAPHQL_ERROR", + retry: false, + error: x, + }: GithubResponseError) + ); + } + } + if (x.data === undefined) { + // See https://github.com/sourcecred/sourcecred/issues/350. + return Promise.reject( + ({type: "NO_DATA", retry: true, error: x}: GithubResponseError) + ); + } + return Promise.resolve(x.data); + }), + (e) => + Promise.reject( + ({type: "FETCH_ERROR", retry: false, error: e}: GithubResponseError) + ) + ); +} + +function retryGithubFetch(fetch, fetchOptions) { + return new Promise((resolve, reject) => { + const operation = retry.operation(); + operation.attempt(() => { + tryGithubFetch(fetch, fetchOptions) + .then((result) => { + resolve(result); + }) + .catch((error) => { + if (error.retry && operation.retry(true)) { + return; + } else { + reject(error); + } + }); + }); + }); +} + async function postQuery({body, variables}, token): Promise { // TODO(#638): Find a more principled way to ingest this parameter. const delayMs: number = parseInt(process.env.GITHUB_DELAY_MS || "0", 10) || 0; @@ -57,20 +127,21 @@ async function postQuery({body, variables}, token): Promise { resolve(); }, delayMs); }); - const payload = { + const postBody = JSON.stringify({ query: stringify.body(body, inlineLayout()), variables: variables, - }; - return fetch(GITHUB_GRAPHQL_SERVER, { + }); + const fetchOptions = { method: "POST", - body: JSON.stringify(payload), + body: postBody, headers: { Authorization: `bearer ${token}`, }, - }) - .then((x) => x.json()) - .then((x) => { - if (x.errors || x.data === undefined) { + }; + return retryGithubFetch(fetch, fetchOptions).catch((error) => { + switch (error.type) { + case "GITHUB_INTERNAL_EXECUTION_ERROR": + case "NO_DATA": console.error( "GitHub query failed! We're tracking these issues at " + "https://github.com/sourcecred/sourcecred/issues/350.\n" + @@ -79,10 +150,22 @@ async function postQuery({body, variables}, token): Promise { "The actual failed response can be found below:\n" + "=================================================" ); - return Promise.reject(x); - } - return Promise.resolve(x.data); - }); + console.error(error.error); + break; + case "GRAPHQL_ERROR": + console.error( + "Unexpected GraphQL error; this may be a bug in SourceCred: ", + JSON.stringify({postBody: postBody, error: error.error}) + ); + break; + case "FETCH_ERROR": + // Network error; no need for additional commentary. + break; + default: + throw new Error((error.type: empty)); + } + return Promise.reject(error); + }); } function ensureNoMorePages(result: any, path = []) { diff --git a/yarn.lock b/yarn.lock index 965d4c7..36bc56a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6792,6 +6792,10 @@ ret@~0.1.10: version "0.1.15" resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" +retry@^0.12.0: + version "0.12.0" + resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b" + right-align@^0.1.1: version "0.1.3" resolved "https://registry.yarnpkg.com/right-align/-/right-align-0.1.3.tgz#61339b722fe6a3515689210d24e14c96148613ef"