Automatically run pagerank on `sourcecred load` (#1115)
This commit updates the `sourcecred load` command so that it also automatically runs PageRank on completion. The implementation is slightly hacky, in that it prints two sets of task status headers/footers to console, for reasons described in a comment in the source code. The user-visible effect of this hack can be seen below: ``` ❯ node bin/sourcecred.js load sourcecred/example-github Starting tasks GO load-git GO load-github DONE load-github DONE load-git Overview Final result: SUCCESS Starting tasks GO run-pagerank DONE run-pagerank Overview Final result: SUCCESS ``` It would be good to clean this up, but for now I think it's acceptable. Note that it is not safe to assume that a PagerankGraph always exists for repos that are included in the RepoIdRegistry. The repo gets added to the registry before the pagerank task runs. Consumers that are loading the `PagerankGraph` can just check that the file exists, though. Test plan: I've added unit tests that verify that the right tasks are generated. Most importantly, the snapshot of the results of `sourcecred load` now include a snapshotted pagerank graph. (The snapshot was updated via `UPDATE_SNAPSHOT=1 yarn test --full`.) Further progress on #967.
This commit is contained in:
parent
fb6c9e1ba0
commit
7efcc13618
File diff suppressed because one or more lines are too long
|
@ -165,31 +165,58 @@ export function makeLoadCommand(
|
|||
}
|
||||
|
||||
export const loadDefaultPlugins = async (options: LoadOptions) => {
|
||||
const tasks = [
|
||||
...Common.defaultPlugins().map((pluginName) => ({
|
||||
id: `load-${pluginName}`,
|
||||
cmd: [
|
||||
const sourcecredCommand = (args) => [
|
||||
process.execPath,
|
||||
"--max_old_space_size=8192",
|
||||
process.argv[1],
|
||||
...args,
|
||||
];
|
||||
const tasks = [
|
||||
...Common.defaultPlugins().map((pluginName) => ({
|
||||
id: `load-${pluginName}`,
|
||||
cmd: sourcecredCommand([
|
||||
"load",
|
||||
...options.repoIds.map((repoId) => repoIdToString(repoId)),
|
||||
"--output",
|
||||
repoIdToString(options.output),
|
||||
"--plugin",
|
||||
pluginName,
|
||||
],
|
||||
]),
|
||||
deps: [],
|
||||
})),
|
||||
];
|
||||
|
||||
const {success} = await execDependencyGraph(tasks, {taskPassLabel: "DONE"});
|
||||
if (success) {
|
||||
addToRepoIdRegistry(options.output);
|
||||
return;
|
||||
} else {
|
||||
const {success: loadSuccess} = await execDependencyGraph(tasks, {
|
||||
taskPassLabel: "DONE",
|
||||
});
|
||||
if (!loadSuccess) {
|
||||
throw new Error("Load tasks failed.");
|
||||
}
|
||||
addToRepoIdRegistry(options.output);
|
||||
// HACK: Logically, we should have the PagerankTask be included in the
|
||||
// first execDependencyGraph run, depending on the other tasks completing.
|
||||
//
|
||||
// However, running pagerank depends on loading the graph
|
||||
// (analysis/loadGraph), which depends on the relevant repo being present
|
||||
// in the RepoIdRegistry. And it is only in the RepoIdRegistry after the
|
||||
// call to execDependencyGraph has been successful.
|
||||
//
|
||||
// As a simple hack, we just call execDependencyGraph again with the
|
||||
// pagerank command after the first one has been successful. This does have
|
||||
// the awkward effect that CLI users will see two blocks of "task: SUCCESS"
|
||||
// information from execDependencyGraph.
|
||||
const pagerankTask = {
|
||||
id: "run-pagerank",
|
||||
cmd: sourcecredCommand(["pagerank", repoIdToString(options.output)]),
|
||||
deps: [],
|
||||
};
|
||||
const {success: pagerankSuccess} = await execDependencyGraph([pagerankTask], {
|
||||
taskPassLabel: "DONE",
|
||||
});
|
||||
if (!pagerankSuccess) {
|
||||
throw new Error("Pagerank task failed.");
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
export const loadIndividualPlugin = async (
|
||||
|
|
|
@ -464,18 +464,18 @@ describe("cli/load", () => {
|
|||
const fooBaz = makeRepoId("foo", "baz");
|
||||
|
||||
it("creates a load sub-task per plugin", async () => {
|
||||
execDependencyGraph.mockResolvedValueOnce({success: true});
|
||||
execDependencyGraph.mockResolvedValue({success: true});
|
||||
await loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(1);
|
||||
const tasks = execDependencyGraph.mock.calls[0][0];
|
||||
expect(tasks).toHaveLength(["git", "github"].length);
|
||||
expect(tasks.map((task) => task.id)).toEqual(
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
|
||||
const loadTasks = execDependencyGraph.mock.calls[0][0];
|
||||
expect(loadTasks).toHaveLength(["git", "github"].length);
|
||||
expect(loadTasks.map((task) => task.id)).toEqual(
|
||||
expect.arrayContaining(["load-git", "load-github"])
|
||||
);
|
||||
for (const task of tasks) {
|
||||
for (const task of loadTasks) {
|
||||
expect(task.cmd).toEqual([
|
||||
expect.stringMatching(/\bnode\b/),
|
||||
expect.stringMatching(/--max_old_space_size=/),
|
||||
|
@ -491,12 +491,35 @@ describe("cli/load", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("creates a pagerank task after load is successful", async () => {
|
||||
execDependencyGraph.mockResolvedValue({success: true});
|
||||
await loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
|
||||
const pagerankTasks = execDependencyGraph.mock.calls[1][0];
|
||||
expect(pagerankTasks).toHaveLength(1);
|
||||
const task = pagerankTasks[0];
|
||||
expect(task).toEqual({
|
||||
id: "run-pagerank",
|
||||
deps: [],
|
||||
cmd: [
|
||||
expect.stringMatching(/\bnode\b/),
|
||||
expect.stringMatching(/--max_old_space_size=/),
|
||||
process.argv[1],
|
||||
"pagerank",
|
||||
"foo/combined",
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("updates RepoIdRegistry on success", async () => {
|
||||
const directory = newSourcecredDirectory();
|
||||
expect(RepoIdRegistry.getRegistry(directory)).toEqual(
|
||||
RepoIdRegistry.emptyRegistry()
|
||||
);
|
||||
execDependencyGraph.mockResolvedValueOnce({success: true});
|
||||
execDependencyGraph.mockResolvedValue({success: true});
|
||||
await loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
|
@ -510,7 +533,7 @@ describe("cli/load", () => {
|
|||
expect(RepoIdRegistry.getRegistry(directory)).toEqual(expectedRegistry);
|
||||
});
|
||||
|
||||
it("throws an error on execDependencyGraph failure", async () => {
|
||||
it("throws an load error on first execDependencyGraph failure", async () => {
|
||||
execDependencyGraph.mockResolvedValueOnce({success: false});
|
||||
const result = loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
|
@ -519,5 +542,16 @@ describe("cli/load", () => {
|
|||
|
||||
expect(result).rejects.toThrow("Load tasks failed.");
|
||||
});
|
||||
|
||||
it("throws an pagerank error on second execDependencyGraph failure", async () => {
|
||||
execDependencyGraph.mockResolvedValueOnce({success: true});
|
||||
execDependencyGraph.mockResolvedValueOnce({success: false});
|
||||
const result = loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
|
||||
expect(result).rejects.toThrow("Pagerank task failed.");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue