Automatically run pagerank on `sourcecred load` (#1115)

This commit updates the `sourcecred load` command so that it also
automatically runs PageRank on completion.

The implementation is slightly hacky, in that it prints two sets of
task status headers/footers to console, for reasons described in a
comment in the source code. The user-visible effect of this hack can
be seen below:

```
❯ node bin/sourcecred.js load sourcecred/example-github

Starting tasks
  GO   load-git
  GO   load-github
 DONE  load-github
 DONE  load-git

Overview
Final result:  SUCCESS

Starting tasks
  GO   run-pagerank
 DONE  run-pagerank

Overview
Final result:  SUCCESS
```

It would be good to clean this up, but for now I think it's acceptable.

Note that it is not safe to assume that a PagerankGraph always exists
for repos that are included in the RepoIdRegistry. The repo gets added
to the registry before the pagerank task runs. Consumers that are
loading the `PagerankGraph` can just check that the file exists, though.

Test plan: I've added unit tests that verify that the right tasks are
generated. Most importantly, the snapshot of the results of `sourcecred
load` now include a snapshotted pagerank graph.
(The snapshot was updated via `UPDATE_SNAPSHOT=1 yarn test --full`.)

Further progress on #967.
This commit is contained in:
Dandelion Mané 2019-04-11 21:21:29 +02:00 committed by GitHub
parent fb6c9e1ba0
commit 7efcc13618
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 18 deletions

File diff suppressed because one or more lines are too long

View File

@ -165,31 +165,58 @@ export function makeLoadCommand(
}
export const loadDefaultPlugins = async (options: LoadOptions) => {
const sourcecredCommand = (args) => [
process.execPath,
"--max_old_space_size=8192",
process.argv[1],
...args,
];
const tasks = [
...Common.defaultPlugins().map((pluginName) => ({
id: `load-${pluginName}`,
cmd: [
process.execPath,
"--max_old_space_size=8192",
process.argv[1],
cmd: sourcecredCommand([
"load",
...options.repoIds.map((repoId) => repoIdToString(repoId)),
"--output",
repoIdToString(options.output),
"--plugin",
pluginName,
],
]),
deps: [],
})),
];
const {success} = await execDependencyGraph(tasks, {taskPassLabel: "DONE"});
if (success) {
addToRepoIdRegistry(options.output);
return;
} else {
const {success: loadSuccess} = await execDependencyGraph(tasks, {
taskPassLabel: "DONE",
});
if (!loadSuccess) {
throw new Error("Load tasks failed.");
}
addToRepoIdRegistry(options.output);
// HACK: Logically, we should have the PagerankTask be included in the
// first execDependencyGraph run, depending on the other tasks completing.
//
// However, running pagerank depends on loading the graph
// (analysis/loadGraph), which depends on the relevant repo being present
// in the RepoIdRegistry. And it is only in the RepoIdRegistry after the
// call to execDependencyGraph has been successful.
//
// As a simple hack, we just call execDependencyGraph again with the
// pagerank command after the first one has been successful. This does have
// the awkward effect that CLI users will see two blocks of "task: SUCCESS"
// information from execDependencyGraph.
const pagerankTask = {
id: "run-pagerank",
cmd: sourcecredCommand(["pagerank", repoIdToString(options.output)]),
deps: [],
};
const {success: pagerankSuccess} = await execDependencyGraph([pagerankTask], {
taskPassLabel: "DONE",
});
if (!pagerankSuccess) {
throw new Error("Pagerank task failed.");
}
return;
};
export const loadIndividualPlugin = async (

View File

@ -464,18 +464,18 @@ describe("cli/load", () => {
const fooBaz = makeRepoId("foo", "baz");
it("creates a load sub-task per plugin", async () => {
execDependencyGraph.mockResolvedValueOnce({success: true});
execDependencyGraph.mockResolvedValue({success: true});
await loadDefaultPlugins({
output: fooCombined,
repoIds: [fooBar, fooBaz],
});
expect(execDependencyGraph).toHaveBeenCalledTimes(1);
const tasks = execDependencyGraph.mock.calls[0][0];
expect(tasks).toHaveLength(["git", "github"].length);
expect(tasks.map((task) => task.id)).toEqual(
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
const loadTasks = execDependencyGraph.mock.calls[0][0];
expect(loadTasks).toHaveLength(["git", "github"].length);
expect(loadTasks.map((task) => task.id)).toEqual(
expect.arrayContaining(["load-git", "load-github"])
);
for (const task of tasks) {
for (const task of loadTasks) {
expect(task.cmd).toEqual([
expect.stringMatching(/\bnode\b/),
expect.stringMatching(/--max_old_space_size=/),
@ -491,12 +491,35 @@ describe("cli/load", () => {
}
});
it("creates a pagerank task after load is successful", async () => {
execDependencyGraph.mockResolvedValue({success: true});
await loadDefaultPlugins({
output: fooCombined,
repoIds: [fooBar, fooBaz],
});
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
const pagerankTasks = execDependencyGraph.mock.calls[1][0];
expect(pagerankTasks).toHaveLength(1);
const task = pagerankTasks[0];
expect(task).toEqual({
id: "run-pagerank",
deps: [],
cmd: [
expect.stringMatching(/\bnode\b/),
expect.stringMatching(/--max_old_space_size=/),
process.argv[1],
"pagerank",
"foo/combined",
],
});
});
it("updates RepoIdRegistry on success", async () => {
const directory = newSourcecredDirectory();
expect(RepoIdRegistry.getRegistry(directory)).toEqual(
RepoIdRegistry.emptyRegistry()
);
execDependencyGraph.mockResolvedValueOnce({success: true});
execDependencyGraph.mockResolvedValue({success: true});
await loadDefaultPlugins({
output: fooCombined,
repoIds: [fooBar, fooBaz],
@ -510,7 +533,7 @@ describe("cli/load", () => {
expect(RepoIdRegistry.getRegistry(directory)).toEqual(expectedRegistry);
});
it("throws an error on execDependencyGraph failure", async () => {
it("throws an load error on first execDependencyGraph failure", async () => {
execDependencyGraph.mockResolvedValueOnce({success: false});
const result = loadDefaultPlugins({
output: fooCombined,
@ -519,5 +542,16 @@ describe("cli/load", () => {
expect(result).rejects.toThrow("Load tasks failed.");
});
it("throws an pagerank error on second execDependencyGraph failure", async () => {
execDependencyGraph.mockResolvedValueOnce({success: true});
execDependencyGraph.mockResolvedValueOnce({success: false});
const result = loadDefaultPlugins({
output: fooCombined,
repoIds: [fooBar, fooBaz],
});
expect(result).rejects.toThrow("Pagerank task failed.");
});
});
});