Retry GitHub queries with exponential backoff (#699)

Summary:
This patch adds independent exponential backoff to each individual
GitHub GraphQL query. We remove the fixed `GITHUB_DELAY_MS` delay before
each query in favor of this solution, which requires no additional
configuration (thus resolving a TODO in the process).

We use the NPM module `retry` with its default settings: namely, a
maximum of 10 retries with factor-2 backoff starting at 1000ms.
Empirically, it seems very unlikely that we should require much more
than 2 retries for a query. (See Test Plan for more details.)

This is both a short-term unblocker and a good kind of thing to have in
the long term.

Test Plan:
Note that `yarn test --full` passes, including `fetchGithubRepoTest.sh`.
Consider manual testing as follows.

Add `console.info` statements in `retryGithubFetch`, then load a large
repository like TensorFlow, and observe the output:

```shell
$ node bin/sourcecred.js load --plugin github tensorflow/tensorflow 2>&1 | ts -s '%.s'
0.252566 Fetching repo...
0.258422 Trying...
5.203014 Trying...
[snip]
1244.521197 Trying...
1254.848044 Will retry (n=1)...
1260.893334 Trying...
1271.547368 Trying...
1282.094735 Will retry (n=1)...
1283.349192 Will retry (n=2)...
1289.188728 Trying...
[snip]
1741.026869 Ensuring no more pages...
1742.139978 Creating view...
1752.023697 Stringifying...
1754.697116 Writing...
1754.697772 Done.
```

This took just under half an hour, with 264 queries total, of which:
  - 225 queries required 0 retries;
  - 38 queries required exactly 1 retry;
  - 1 query required exactly 2 retries; and
  - 0 queries required 3 or more retries.

wchargin-branch: github-backoff
This commit is contained in:
William Chargin 2018-08-22 11:37:29 -07:00 committed by GitHub
parent d839fcae95
commit 0c2908dbfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 101 additions and 12 deletions

View File

@ -1,6 +1,7 @@
# Changelog
## [Unreleased]
- Execute GraphQL queries with exponential backoff (#699)
- Introduce a simplified Git plugin that only tracks commits (#685)
- Rename cred explorer table columns (#680)
- Display version string in the app's footer

View File

@ -23,6 +23,7 @@
"react": "^16.4.1",
"react-dom": "^16.4.1",
"react-router": "3.2.1",
"retry": "^0.12.0",
"tmp": "^0.0.33",
"whatwg-fetch": "2.0.3"
},

View File

@ -5,6 +5,7 @@
*/
import fetch from "isomorphic-fetch";
import retry from "retry";
import {stringify, inlineLayout} from "../../graphql/queries";
import {createQuery, createVariables, postQueryExhaustive} from "./graphql";
@ -49,6 +50,75 @@ export default function fetchGithubRepo(
const GITHUB_GRAPHQL_SERVER = "https://api.github.com/graphql";
type GithubResponseError =
| {|+type: "FETCH_ERROR", retry: false, error: Error|}
| {|+type: "GRAPHQL_ERROR", retry: false, error: mixed|}
| {|+type: "GITHUB_INTERNAL_EXECUTION_ERROR", retry: true, error: mixed|}
| {|+type: "NO_DATA", retry: true, error: mixed|};
// Fetch against the GitHub API with the provided options, returning a
// promise that either resolves to the GraphQL result data or rejects
// to a `GithubResponseError`.
function tryGithubFetch(fetch, fetchOptions): Promise<any> {
return fetch(GITHUB_GRAPHQL_SERVER, fetchOptions).then(
(x) =>
x.json().then((x) => {
if (x.errors) {
if (
x.errors.length === 1 &&
x.errors[0].message.includes("it could be a GitHub bug")
) {
return Promise.reject(
({
type: "GITHUB_INTERNAL_EXECUTION_ERROR",
retry: true,
error: x,
}: GithubResponseError)
);
} else {
return Promise.reject(
({
type: "GRAPHQL_ERROR",
retry: false,
error: x,
}: GithubResponseError)
);
}
}
if (x.data === undefined) {
// See https://github.com/sourcecred/sourcecred/issues/350.
return Promise.reject(
({type: "NO_DATA", retry: true, error: x}: GithubResponseError)
);
}
return Promise.resolve(x.data);
}),
(e) =>
Promise.reject(
({type: "FETCH_ERROR", retry: false, error: e}: GithubResponseError)
)
);
}
function retryGithubFetch(fetch, fetchOptions) {
return new Promise((resolve, reject) => {
const operation = retry.operation();
operation.attempt(() => {
tryGithubFetch(fetch, fetchOptions)
.then((result) => {
resolve(result);
})
.catch((error) => {
if (error.retry && operation.retry(true)) {
return;
} else {
reject(error);
}
});
});
});
}
async function postQuery({body, variables}, token): Promise<any> {
// TODO(#638): Find a more principled way to ingest this parameter.
const delayMs: number = parseInt(process.env.GITHUB_DELAY_MS || "0", 10) || 0;
@ -57,20 +127,21 @@ async function postQuery({body, variables}, token): Promise<any> {
resolve();
}, delayMs);
});
const payload = {
const postBody = JSON.stringify({
query: stringify.body(body, inlineLayout()),
variables: variables,
};
return fetch(GITHUB_GRAPHQL_SERVER, {
});
const fetchOptions = {
method: "POST",
body: JSON.stringify(payload),
body: postBody,
headers: {
Authorization: `bearer ${token}`,
},
})
.then((x) => x.json())
.then((x) => {
if (x.errors || x.data === undefined) {
};
return retryGithubFetch(fetch, fetchOptions).catch((error) => {
switch (error.type) {
case "GITHUB_INTERNAL_EXECUTION_ERROR":
case "NO_DATA":
console.error(
"GitHub query failed! We're tracking these issues at " +
"https://github.com/sourcecred/sourcecred/issues/350.\n" +
@ -79,10 +150,22 @@ async function postQuery({body, variables}, token): Promise<any> {
"The actual failed response can be found below:\n" +
"================================================="
);
return Promise.reject(x);
}
return Promise.resolve(x.data);
});
console.error(error.error);
break;
case "GRAPHQL_ERROR":
console.error(
"Unexpected GraphQL error; this may be a bug in SourceCred: ",
JSON.stringify({postBody: postBody, error: error.error})
);
break;
case "FETCH_ERROR":
// Network error; no need for additional commentary.
break;
default:
throw new Error((error.type: empty));
}
return Promise.reject(error);
});
}
function ensureNoMorePages(result: any, path = []) {

View File

@ -6792,6 +6792,10 @@ ret@~0.1.10:
version "0.1.15"
resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
retry@^0.12.0:
version "0.12.0"
resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
right-align@^0.1.1:
version "0.1.3"
resolved "https://registry.yarnpkg.com/right-align/-/right-align-0.1.3.tgz#61339b722fe6a3515689210d24e14c96148613ef"