Fix flakey CI memory issues (#1230)

Ever since I upgraded all of the dependencies, we've been having
regular CI failures, which seem to share a common root cause of memory
exhaustion. Here are some examples: [1], [2].

[1]: https://circleci.com/gh/sourcecred/sourcecred/1246
[2]: https://circleci.com/gh/sourcecred/sourcecred/1239

After some experimentation, I've found that we can solve the
issue by ensuring that jest runs on its own in CI, so that it doesn't
contend with other tests for memories. Also, I reduce its max workers to
2, which matches the number of CPUs in the CircleCI containers.

Unfortunately, this does increase our build time. The postcommit (non
full) test now takes 45-60s (up from 30-50s), and the full test is also
a little slower. However, building in about one minute is still
acceptably fast, and having regular flakey test failures is not
acceptable, so this is still a win.

If we want to improve on this in the future, we should look into the git
shells getting spawned in `config/env.js`. I noticed that they were
often involved in the out-of-memory failures.

Also, I modified `.circleci/config.yml` so that any branch matching the
regular expression `/ci-.*/` will trigger a full build. That makes it
easier to test against CI failures.

Test plan: I ran about ~10 full builds with this change, and more with
similar variations, and they all passed. Verify that the full builds
that are run for this commit also all pass! Also, verify that running
yarn test locally has unchanged behavior, and running 
`yarn test --ci` locally lets jest run to completion before running
any other test.
This commit is contained in:
Dandelion Mané 2019-07-16 01:51:14 +01:00 committed by GitHub
parent 7509a78f65
commit 78dc571cdc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 16 deletions

View File

@ -36,19 +36,19 @@ jobs:
steps:
- checkout
- set_up_node_modules
- run: yarn test
- run: yarn test --ci
test_full:
executor: node12
steps:
- checkout
- set_up_node_modules
- run: yarn test --full
- run: yarn test --full --ci
test_full_10:
executor: node10
steps:
- checkout
- set_up_node_modules
- run: yarn test --full
- run: yarn test --full --ci
workflows:
version: 2
@ -60,13 +60,13 @@ workflows:
branches:
only:
- master
- ci-test
- /ci-.*/
- test_full_10:
filters:
branches:
only:
- master
- ci-test
- /ci-.*/
nightly:
triggers:
- schedule:

View File

@ -68,6 +68,10 @@ process.env.NODE_PATH = (process.env.NODE_PATH || "")
// this optional. However, note that this computation is performed at
// build time, so end users of SourceCred as a library or application
// should not need this dependency.
//
// Note: Calling `getGitState` inside test code seems to increase memory usage,
// potentially leading to CI out-of-memory failures. We should investigate
// this, and consider making `getGitState` a compile step for test code.
function getGitState() /*: GitState */ {
const env = {
GIT_ATTR_NOSYSTEM: "1",

View File

@ -10,18 +10,20 @@ function main() {
const options = parseArgs();
const printVerboseResults = options.mode === "FULL";
const runOptions = {printVerboseResults};
const tasks = makeTasks(options.mode);
const tasks = makeTasks(options.mode, options.limitMemoryUsage);
execDependencyGraph(tasks, runOptions).then(({success}) => {
process.exitCode = success ? 0 : 1;
});
}
function parseArgs() {
const options = {mode: "BASIC"};
const options = {mode: "BASIC", limitMemoryUsage: false};
const args = process.argv.slice(2);
for (const arg of args) {
if (arg === "--full") {
options.mode = "FULL";
} else if (arg === "--ci") {
options.limitMemoryUsage = true;
} else {
throw new Error("unknown argument: " + JSON.stringify(arg));
}
@ -29,7 +31,10 @@ function parseArgs() {
return options;
}
function makeTasks(mode /*: "BASIC" | "FULL" */) {
function makeTasks(
mode /*: "BASIC" | "FULL" */,
limitMemoryUsage /*: boolean */
) {
const backendOutput = tmp.dirSync({
unsafeCleanup: true,
prefix: "sourcecred-test-",
@ -72,7 +77,7 @@ function makeTasks(mode /*: "BASIC" | "FULL" */) {
},
{
id: "unit",
cmd: ["yarn", "run", "--silent", "unit", "--ci", "--maxWorkers=4"],
cmd: ["yarn", "run", "--silent", "unit", "--ci"],
deps: [],
},
{
@ -129,12 +134,30 @@ function makeTasks(mode /*: "BASIC" | "FULL" */) {
deps: ["backend"],
},
];
switch (mode) {
case "BASIC":
return basicTasks;
case "FULL":
return [].concat(basicTasks, extraTasks);
default:
/*:: (mode: empty); */ throw new Error(mode);
const tasks = (function() {
switch (mode) {
case "BASIC":
return basicTasks;
case "FULL":
return [].concat(basicTasks, extraTasks);
default:
/*:: (mode: empty); */ throw new Error(mode);
}
})();
if (limitMemoryUsage) {
// We've had issues with our tests flakily failing in CI, due to apparent
// memory issues. I've found that if we both limit the maxWorkers to 2, and
// ensure that nothing else runs at the same time as jest, then it stops
// flakily failing.
tasks.forEach((task) => {
if (task.id === "unit") {
task.cmd.push("--maxWorkers=2");
} else {
// Ensure that everything else depends on unit tests, so unit tests
// will run first and run alone.
task.deps.push("unit");
}
});
}
return tasks;
}