Fix flakey CI memory issues (#1230)

Ever since I upgraded all of the dependencies, we've been having regular CI failures, which seem to share a common root cause of memory exhaustion. Here are some examples: [1], [2]. [1]: https://circleci.com/gh/sourcecred/sourcecred/1246 [2]: https://circleci.com/gh/sourcecred/sourcecred/1239 After some experimentation, I've found that we can solve the issue by ensuring that jest runs on its own in CI, so that it doesn't contend with other tests for memories. Also, I reduce its max workers to 2, which matches the number of CPUs in the CircleCI containers. Unfortunately, this does increase our build time. The postcommit (non full) test now takes 45-60s (up from 30-50s), and the full test is also a little slower. However, building in about one minute is still acceptably fast, and having regular flakey test failures is not acceptable, so this is still a win. If we want to improve on this in the future, we should look into the git shells getting spawned in `config/env.js`. I noticed that they were often involved in the out-of-memory failures. Also, I modified `.circleci/config.yml` so that any branch matching the regular expression `/ci-.*/` will trigger a full build. That makes it easier to test against CI failures. Test plan: I ran about ~10 full builds with this change, and more with similar variations, and they all passed. Verify that the full builds that are run for this commit also all pass! Also, verify that running yarn test locally has unchanged behavior, and running `yarn test --ci` locally lets jest run to completion before running any other test.
2025-01-27 04:46:13 +00:00 · 2019-07-16 01:51:14 +01:00 · 2019-07-16 01:51:14 +01:00 · 78dc571cdc
commit 78dc571cdc
parent 7509a78f65
3 changed files with 43 additions and 16 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -36,19 +36,19 @@ jobs:
    steps:
      - checkout
      - set_up_node_modules
-      - run: yarn test
+      - run: yarn test --ci
  test_full:
    executor: node12
    steps:
      - checkout
      - set_up_node_modules
-      - run: yarn test --full
+      - run: yarn test --full --ci
  test_full_10:
    executor: node10
    steps:
      - checkout
      - set_up_node_modules
-      - run: yarn test --full
+      - run: yarn test --full --ci

 workflows:
  version: 2
@ -60,13 +60,13 @@ workflows:
            branches:
              only:
                - master
-                - ci-test
+                - /ci-.*/
      - test_full_10:
          filters:
            branches:
              only:
                - master
-                - ci-test
+                - /ci-.*/
  nightly:
    triggers:
      - schedule:
--- a/config/env.js
+++ b/config/env.js
@ -68,6 +68,10 @@ process.env.NODE_PATH = (process.env.NODE_PATH || "")
 // this optional. However, note that this computation is performed at
 // build time, so end users of SourceCred as a library or application
 // should not need this dependency.
+//
+// Note: Calling `getGitState` inside test code seems to increase memory usage,
+// potentially leading to CI out-of-memory failures. We should investigate
+// this, and consider making `getGitState` a compile step for test code.
 function getGitState() /*: GitState */ {
  const env = {
    GIT_ATTR_NOSYSTEM: "1",
--- a/config/test.js
+++ b/config/test.js
@ -10,18 +10,20 @@ function main() {
  const options = parseArgs();
  const printVerboseResults = options.mode === "FULL";
  const runOptions = {printVerboseResults};
-  const tasks = makeTasks(options.mode);
+  const tasks = makeTasks(options.mode, options.limitMemoryUsage);
  execDependencyGraph(tasks, runOptions).then(({success}) => {
    process.exitCode = success ? 0 : 1;
  });
 }

 function parseArgs() {
-  const options = {mode: "BASIC"};
+  const options = {mode: "BASIC", limitMemoryUsage: false};
  const args = process.argv.slice(2);
  for (const arg of args) {
    if (arg === "--full") {
      options.mode = "FULL";
+    } else if (arg === "--ci") {
+      options.limitMemoryUsage = true;
    } else {
      throw new Error("unknown argument: " + JSON.stringify(arg));
    }
@ -29,7 +31,10 @@ function parseArgs() {
  return options;
 }

-function makeTasks(mode /*: "BASIC" | "FULL" */) {
+function makeTasks(
+  mode /*: "BASIC" | "FULL" */,
+  limitMemoryUsage /*: boolean */
+) {
  const backendOutput = tmp.dirSync({
    unsafeCleanup: true,
    prefix: "sourcecred-test-",
@ -72,7 +77,7 @@ function makeTasks(mode /*: "BASIC" | "FULL" */) {
    },
    {
      id: "unit",
-      cmd: ["yarn", "run", "--silent", "unit", "--ci", "--maxWorkers=4"],
+      cmd: ["yarn", "run", "--silent", "unit", "--ci"],
      deps: [],
    },
    {
@ -129,12 +134,30 @@ function makeTasks(mode /*: "BASIC" | "FULL" */) {
      deps: ["backend"],
    },
  ];
-  switch (mode) {
-    case "BASIC":
-      return basicTasks;
-    case "FULL":
-      return [].concat(basicTasks, extraTasks);
-    default:
-      /*:: (mode: empty); */ throw new Error(mode);
+  const tasks = (function() {
+    switch (mode) {
+      case "BASIC":
+        return basicTasks;
+      case "FULL":
+        return [].concat(basicTasks, extraTasks);
+      default:
+        /*:: (mode: empty); */ throw new Error(mode);
+    }
+  })();
+  if (limitMemoryUsage) {
+    // We've had issues with our tests flakily failing in CI, due to apparent
+    // memory issues. I've found that if we both limit the maxWorkers to 2, and
+    // ensure that nothing else runs at the same time as jest, then it stops
+    // flakily failing.
+    tasks.forEach((task) => {
+      if (task.id === "unit") {
+        task.cmd.push("--maxWorkers=2");
+      } else {
+        // Ensure that everything else depends on unit tests, so unit tests
+        // will run first and run alone.
+        task.deps.push("unit");
+      }
+    });
  }
+  return tasks;
 }