Add sourcecred pagerank for backend pagerank (#1114)

This commit adds a new CLI command, `pagerank`, which runs PageRank on a
given repository. At present, the command only ever uses the default
weights, although I plan to make this configurable in the future. The
command then saves the resultant pagerank graph in the SourceCred
directory.

On its own, this command is not yet very compelling, as it doesn't
present any easily-consumed information (e.g. users' scores). However,
it is the first step for building other commands which do just that. My
intention is to make running this command the last step of `sourcecred
load`, so that future commands may assume the existence of pagerank
scores for any loaded repository.

Test plan: The new command is thoroughly tested; see
`cli/pagerank.test.js`. It also has nearly perfect code coverage (one
line missing, the dependency-injected real function for loading graphs).

Additionally, the following sequence of commands works:
```
$ yarn backend
$ node bin/sourcecred.js load sourcecred/pm
$ node bin/sourcecred.js pagerank sourcecred/pm
$ cat $SOURCECRED_DIRECTORY/data/sourcecred/pm/pagerankGraph.json
```

Material progress on #967.
This commit is contained in:
Dandelion Mané 2019-03-25 18:05:58 -07:00 committed by GitHub
parent 669f34d009
commit 012c4f3eb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 521 additions and 0 deletions

View File

@ -1,6 +1,7 @@
# Changelog # Changelog
## [Unreleased] ## [Unreleased]
- Add the `pagerank` command (#1114)
- Add description tooltips for node and edge types in the weight configuration UI (#1081) - Add description tooltips for node and edge types in the weight configuration UI (#1081)
- Add the `export-graph` command (#1110) - Add the `export-graph` command (#1110)
- Enable loading private repositories (#1085) - Enable loading private repositories (#1085)

View File

@ -6,6 +6,7 @@ import dedent from "../util/dedent";
import {help as loadHelp} from "./load"; import {help as loadHelp} from "./load";
import {help as analyzeHelp} from "./analyze"; import {help as analyzeHelp} from "./analyze";
import {help as pagerankHelp} from "./pagerank";
import {help as exportGraphHelp} from "./exportGraph"; import {help as exportGraphHelp} from "./exportGraph";
const help: Command = async (args, std) => { const help: Command = async (args, std) => {
@ -18,6 +19,7 @@ const help: Command = async (args, std) => {
help: metaHelp, help: metaHelp,
load: loadHelp, load: loadHelp,
analyze: analyzeHelp, analyze: analyzeHelp,
pagerank: pagerankHelp,
"export-graph": exportGraphHelp, "export-graph": exportGraphHelp,
}; };
if (subHelps[command] !== undefined) { if (subHelps[command] !== undefined) {
@ -40,6 +42,7 @@ function usage(print: (string) => void): void {
load load repository data into SourceCred load load repository data into SourceCred
analyze analyze cred for a loaded repository analyze analyze cred for a loaded repository
export-graph print a raw SourceCred graph export-graph print a raw SourceCred graph
pagerank recompute cred scores
help show this help message help show this help message
Use 'sourcecred help COMMAND' for help about an individual command. Use 'sourcecred help COMMAND' for help about an individual command.

210
src/cli/pagerank.js Normal file
View File

@ -0,0 +1,210 @@
// @flow
// Implementation of `sourcecred pagerank`.
import fs from "fs-extra";
import path from "path";
import {Graph} from "../core/graph";
import {
PagerankGraph,
DEFAULT_SYNTHETIC_LOOP_WEIGHT,
DEFAULT_CONVERGENCE_THRESHOLD,
DEFAULT_MAX_ITERATIONS,
} from "../core/pagerankGraph";
import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId";
import dedent from "../util/dedent";
import type {Command} from "./command";
import * as Common from "./common";
import stringify from "json-stable-stringify";
import {loadGraph, type LoadGraphResult} from "../analysis/loadGraph";
import {
type WeightedTypes,
combineWeights,
defaultWeightsForDeclaration,
} from "../analysis/weights";
import {weightsToEdgeEvaluator} from "../analysis/weightsToEdgeEvaluator";
import type {IAnalysisAdapter} from "../analysis/analysisAdapter";
import {AnalysisAdapter as GithubAnalysisAdapter} from "../plugins/github/analysisAdapter";
import {AnalysisAdapter as GitAnalysisAdapter} from "../plugins/git/analysisAdapter";
import {FallbackAdapter} from "../analysis/fallbackAdapter";
function usage(print: (string) => void): void {
print(
dedent`\
usage: sourcecred pagerank REPO_ID [--help]
Runs PageRank for a given REPO_ID, and saves the resultant
PagerankGraph to the SOURCECRED_DIRECTORY. Data must already
be loaded for the given REPO_ID, using 'sourcecred load REPO_ID'.
PageRank is always run with the default plugin weights. We expect
to make the weights configurable in the future.
REPO_ID refers to a GitHub repository in the form OWNER/NAME: for
example, torvalds/linux. The REPO_ID may be a "combined" repo as
created by the --output flag to sourcecred load.
Running this command may take a lot of heap. If it fails with an
out of memory (OOM) issue, try manually increasing the heap size
by passing the following argument to node process:
'--max_old_space_size=8192'
as in:
'node --max_old_space_size=8192 bin/sourcecred.js pagerank ...'
Arguments:
REPO_ID
Already-loaded repository for which to load data.
--help
Show this help message and exit, as 'sourcecred help pagerank'.
Environment Variables:
SOURCECRED_DIRECTORY
Directory owned by SourceCred, in which data, caches,
registries, etc. are stored. Optional: defaults to a
directory 'sourcecred' under your OS's temporary directory;
namely:
${Common.defaultSourcecredDirectory()}
`.trimRight()
);
}
function die(std, message) {
std.err("fatal: " + message);
std.err("fatal: run 'sourcecred help pagerank' for help");
return 1;
}
/**
* Harness to create a Pagerank CLI command.
* It's factored so as to make it easy to test the CLI bits, separately
* from the core logic.
* It takes a `loader`, which loads the graph corresponding to a RepoId,
* a `pagerankRunner` which runs pagerank on that graph, and a `saver`
* which is responsible for saving the resultant PagerankGraph to disk.
*/
export function makePagerankCommand(
loadGraph: (RepoId) => Promise<LoadGraphResult>,
runPagerank: (Graph) => Promise<PagerankGraph>,
savePagerankGraph: (RepoId, PagerankGraph) => Promise<void>
): Command {
return async function pagerank(args, std) {
let repoId: RepoId | null = null;
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case "--help": {
usage(std.out);
return 0;
}
default: {
if (repoId != null) {
return die(std, "multiple repository IDs provided");
}
// Should be a repository.
repoId = stringToRepoId(args[i]);
break;
}
}
}
if (repoId == null) {
return die(std, "no repository ID provided");
}
const result: LoadGraphResult = await loadGraph(repoId);
switch (result.status) {
case "REPO_NOT_LOADED": {
const repoIdStr = repoIdToString(repoId);
std.err(`fatal: repository ID ${repoIdStr} not loaded`);
std.err(`Try running \`sourcecred load ${repoIdStr}\` first.`);
return 1;
}
case "PLUGIN_FAILURE": {
std.err(
`fatal: plugin "${result.pluginName}" errored: ${
result.error.message
}`
);
return 1;
}
case "SUCCESS": {
const pagerankGraph = await runPagerank(result.graph);
await savePagerankGraph(repoId, pagerankGraph);
return 0;
}
// istanbul ignore next: unreachable per Flow
default: {
std.err(`Unexpected status: ${(result.status: empty)}`);
return 1;
}
}
};
}
export async function runPagerank(
weights: WeightedTypes,
graph: Graph
): Promise<PagerankGraph> {
const evaluator = weightsToEdgeEvaluator(weights);
const pagerankGraph = new PagerankGraph(
graph,
evaluator,
DEFAULT_SYNTHETIC_LOOP_WEIGHT
);
await pagerankGraph.runPagerank({
maxIterations: DEFAULT_MAX_ITERATIONS,
convergenceThreshold: DEFAULT_CONVERGENCE_THRESHOLD,
});
return pagerankGraph;
}
export async function savePagerankGraph(
directory: string,
repoId: RepoId,
pg: PagerankGraph
): Promise<void> {
const pgJSON = pg.toJSON();
const pgDir = path.join(directory, "data", repoIdToString(repoId));
await fs.ensureDir(pgDir);
const pgFile = path.join(pgDir, "pagerankGraph.json");
await fs.writeFile(pgFile, stringify(pgJSON));
}
function weightsForAdapters(
adapters: $ReadOnlyArray<IAnalysisAdapter>
): WeightedTypes {
const declarations = adapters.map((a) => a.declaration());
return combineWeights(declarations.map(defaultWeightsForDeclaration));
}
export const defaultAdapters = () => [
new GithubAnalysisAdapter(),
new GitAnalysisAdapter(),
new FallbackAdapter(),
];
const defaultLoader = (r: RepoId) =>
loadGraph(Common.sourcecredDirectory(), defaultAdapters(), r);
export const defaultWeights = () => weightsForAdapters(defaultAdapters());
export const defaultPagerank = (g: Graph) => runPagerank(defaultWeights(), g);
export const defaultSaver = (r: RepoId, pg: PagerankGraph) =>
savePagerankGraph(Common.sourcecredDirectory(), r, pg);
export const pagerankCommand = makePagerankCommand(
defaultLoader,
defaultPagerank,
defaultSaver
);
export const help: Command = async (args, std) => {
if (args.length === 0) {
usage(std.out);
return 0;
} else {
usage(std.err);
return 1;
}
};
export default pagerankCommand;

295
src/cli/pagerank.test.js Normal file
View File

@ -0,0 +1,295 @@
// @flow
import tmp from "tmp";
import path from "path";
import fs from "fs-extra";
import {run} from "./testUtil";
import * as NullUtil from "../util/null";
import {
help,
makePagerankCommand,
savePagerankGraph,
runPagerank,
defaultWeights,
defaultPagerank,
defaultAdapters,
defaultSaver,
} from "./pagerank";
import {Graph, NodeAddress, EdgeAddress} from "../core/graph";
import {advancedGraph} from "../core/graphTestUtil";
import {
PagerankGraph,
DEFAULT_SYNTHETIC_LOOP_WEIGHT,
DEFAULT_CONVERGENCE_THRESHOLD,
DEFAULT_MAX_ITERATIONS,
} from "../core/pagerankGraph";
import type {NodeType, EdgeType} from "../analysis/types";
import {fallbackDeclaration} from "../analysis/fallbackDeclaration";
import {
defaultWeightsForDeclaration,
combineWeights,
} from "../analysis/weights";
import {weightsToEdgeEvaluator} from "../analysis/weightsToEdgeEvaluator";
import {makeRepoId, repoIdToString} from "../core/repoId";
describe("cli/pagerank", () => {
describe("'help' command", () => {
it("prints usage when given no arguments", async () => {
expect(await run(help, [])).toEqual({
exitCode: 0,
stdout: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred pagerank/),
]),
stderr: [],
});
});
it("fails when given arguments", async () => {
expect(await run(help, ["foo/bar"])).toEqual({
exitCode: 1,
stdout: [],
stderr: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred pagerank/),
]),
});
});
});
describe("'pagerank' command", () => {
it("prints usage with '--help'", async () => {
const pagerank = makePagerankCommand(jest.fn(), jest.fn(), jest.fn());
expect(await run(pagerank, ["--help"])).toEqual({
exitCode: 0,
stdout: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred pagerank/),
]),
stderr: [],
});
});
it("errors if no repoId is provided", async () => {
const pagerank = makePagerankCommand(jest.fn(), jest.fn(), jest.fn());
expect(await run(pagerank, [])).toEqual({
exitCode: 1,
stdout: [],
stderr: expect.arrayContaining([
"fatal: no repository ID provided",
"fatal: run 'sourcecred help pagerank' for help",
]),
});
});
it("errors if multiple repos are provided", async () => {
const pagerank = makePagerankCommand(jest.fn(), jest.fn(), jest.fn());
expect(await run(pagerank, ["foo/bar", "zod/zoink"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: multiple repository IDs provided",
"fatal: run 'sourcecred help pagerank' for help",
],
});
});
it("errors if the repoId was not loaded first", async () => {
const loadResult = {status: "REPO_NOT_LOADED"};
const loader = () => new Promise((resolve) => resolve(loadResult));
const pagerank = makePagerankCommand(loader, jest.fn(), jest.fn());
const result = run(pagerank, ["zod/zoink"]);
expect(await result).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: repository ID zod/zoink not loaded",
"Try running `sourcecred load zod/zoink` first.",
],
});
});
it("passes the right arguments to loadGraph", async () => {
const mockLoader = jest.fn();
const pagerank = makePagerankCommand(mockLoader, jest.fn(), jest.fn());
const repoId = makeRepoId("foo", "bar");
await run(pagerank, ["foo/bar"]);
expect(mockLoader).toHaveBeenCalledWith(repoId);
});
it("prints a message if there was a plugin failure", async () => {
const failure = {
status: "PLUGIN_FAILURE",
pluginName: "foo",
error: new Error("FooError"),
};
const loader = (_unused_repoId) =>
new Promise((resolve) => resolve(failure));
const command = makePagerankCommand(loader, jest.fn(), jest.fn());
const result = await run(command, ["foo/bar"]);
expect(result).toEqual({
exitCode: 1,
stdout: [],
stderr: ['fatal: plugin "foo" errored: FooError'],
});
});
describe("on successful load", () => {
const graph = () => new Graph().addNode(NodeAddress.empty);
const graphResult = () => ({status: "SUCCESS", graph: graph()});
const loader = (_unused_repoId) =>
new Promise((resolve) => resolve(graphResult()));
const evaluator = (_unused_edge) => ({toWeight: 1, froWeight: 1});
const pagerankGraph = () => new PagerankGraph(graph(), evaluator, 0.001);
const mockPagerankRunner = (_unused_graph) =>
new Promise((resolve) => resolve(pagerankGraph()));
it("passes the loaded graph to the pagerank runner", async () => {
const mock = jest.fn();
const command = makePagerankCommand(loader, mock, jest.fn());
await run(command, ["foo/bar"]);
expect(mock).toHaveBeenCalledWith(graph());
});
it("passes the resultant pagerankGraph to the saver", async () => {
const mock = jest.fn();
const command = makePagerankCommand(loader, mockPagerankRunner, mock);
await run(command, ["foo/bar"]);
const repoId = makeRepoId("foo", "bar");
expect(mock).toHaveBeenCalledWith(repoId, pagerankGraph());
});
it("returns with exit code 0 and nothing printed to stdout/stderr", async () => {
const command = makePagerankCommand(
loader,
mockPagerankRunner,
jest.fn()
);
const result = await run(command, ["foo/bar"]);
expect(result).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
});
});
});
describe("savePagerankGraph", () => {
it("saves the PagerankGraphJSON to the right filepath", async () => {
const graph = new Graph().addNode(NodeAddress.empty);
const evaluator = (_unused_edge) => ({toWeight: 1, froWeight: 2});
const prg = new PagerankGraph(graph, evaluator);
const dirname = tmp.dirSync().name;
const repoId = makeRepoId("foo", "bar");
await savePagerankGraph(dirname, repoId, prg);
const expectedPath = path.join(
dirname,
"data",
repoIdToString(repoId),
"pagerankGraph.json"
);
const blob = fs.readFileSync(expectedPath).toString();
const json = JSON.parse(blob);
expect(json).toEqual(prg.toJSON());
});
});
describe("runPagerank", () => {
it("computes pagerank with the given weights", async () => {
const nodeType: NodeType = {
name: "foo",
pluralName: "foos",
prefix: NodeAddress.fromParts(["src"]),
defaultWeight: 3,
description: "an example node type",
};
const edgeType: EdgeType = {
forwardName: "bars",
backwardName: "barred by",
defaultForwardWeight: 5,
defaultBackwardWeight: 3,
prefix: EdgeAddress.fromParts(["hom"]),
description: "an example edge type",
};
const exampleDeclaration = {
name: "example",
nodePrefix: NodeAddress.fromParts(["src"]),
edgePrefix: EdgeAddress.fromParts(["hom"]),
nodeTypes: [nodeType],
edgeTypes: [edgeType],
};
const exampleWeightedTypes = defaultWeightsForDeclaration(
exampleDeclaration
);
const fallbackWeightedTypes = defaultWeightsForDeclaration(
fallbackDeclaration
);
const weightedTypes = combineWeights([
exampleWeightedTypes,
fallbackWeightedTypes,
]);
const graph = advancedGraph().graph1();
const actualPagerankGraph = await runPagerank(weightedTypes, graph);
const expectedPagerankGraph = new PagerankGraph(
graph,
weightsToEdgeEvaluator(weightedTypes),
DEFAULT_SYNTHETIC_LOOP_WEIGHT
);
await expectedPagerankGraph.runPagerank({
convergenceThreshold: DEFAULT_CONVERGENCE_THRESHOLD,
maxIterations: DEFAULT_MAX_ITERATIONS,
});
expect(actualPagerankGraph.equals(expectedPagerankGraph)).toBe(true);
});
it("default pageRank is robust to nodes that are not owned by any plugin", async () => {
const graph = new Graph().addNode(NodeAddress.empty).addEdge({
address: EdgeAddress.empty,
src: NodeAddress.empty,
dst: NodeAddress.empty,
});
await defaultPagerank(graph);
});
});
it("default weights contain every node and edge type from adapters", () => {
const ws = defaultWeights();
for (const adapter of defaultAdapters()) {
const declaration = adapter.declaration();
for (const nodeType of declaration.nodeTypes) {
const weightedNodeType = NullUtil.get(ws.nodes.get(nodeType.prefix));
expect(weightedNodeType.weight).toEqual(nodeType.defaultWeight);
expect(weightedNodeType.type).toEqual(nodeType);
}
for (const edgeType of declaration.edgeTypes) {
const weightedEdgeType = NullUtil.get(ws.edges.get(edgeType.prefix));
expect(weightedEdgeType.forwardWeight).toEqual(
edgeType.defaultForwardWeight
);
expect(weightedEdgeType.backwardWeight).toEqual(
edgeType.defaultBackwardWeight
);
expect(weightedEdgeType.type).toEqual(edgeType);
}
}
});
it("defaultSaver saves to sourcecred directory", async () => {
const dirname = tmp.dirSync().name;
process.env.SOURCECRED_DIRECTORY = dirname;
const repoId = makeRepoId("foo", "bar");
const prg = new PagerankGraph(
new Graph().addNode(NodeAddress.empty),
(_unused_edge) => ({toWeight: 1, froWeight: 2})
);
await defaultSaver(repoId, prg);
const expectedPath = path.join(
dirname,
"data",
"foo/bar",
"pagerankGraph.json"
);
const blob = await fs.readFile(expectedPath);
const actualJSON = JSON.parse(blob.toString());
expect(actualJSON).toEqual(prg.toJSON());
});
});

View File

@ -9,6 +9,7 @@ import help from "./help";
import load from "./load"; import load from "./load";
import analyze from "./analyze"; import analyze from "./analyze";
import exportGraph from "./exportGraph"; import exportGraph from "./exportGraph";
import pagerank from "./pagerank";
const sourcecred: Command = async (args, std) => { const sourcecred: Command = async (args, std) => {
if (args.length === 0) { if (args.length === 0) {
@ -28,6 +29,8 @@ const sourcecred: Command = async (args, std) => {
return analyze(args.slice(1), std); return analyze(args.slice(1), std);
case "export-graph": case "export-graph":
return exportGraph(args.slice(1), std); return exportGraph(args.slice(1), std);
case "pagerank":
return pagerank(args.slice(1), std);
default: default:
std.err("fatal: unknown command: " + JSON.stringify(args[0])); std.err("fatal: unknown command: " + JSON.stringify(args[0]));
std.err("fatal: run 'sourcecred help' for commands and usage"); std.err("fatal: run 'sourcecred help' for commands and usage");

View File

@ -15,6 +15,7 @@ jest.mock("./help", () => mockCommand("help"));
jest.mock("./load", () => mockCommand("load")); jest.mock("./load", () => mockCommand("load"));
jest.mock("./analyze", () => mockCommand("analyze")); jest.mock("./analyze", () => mockCommand("analyze"));
jest.mock("./exportGraph", () => mockCommand("export-graph")); jest.mock("./exportGraph", () => mockCommand("export-graph"));
jest.mock("./pagerank", () => mockCommand("pagerank"));
describe("cli/sourcecred", () => { describe("cli/sourcecred", () => {
it("fails with usage when invoked with no arguments", async () => { it("fails with usage when invoked with no arguments", async () => {
@ -75,6 +76,14 @@ describe("cli/sourcecred", () => {
}); });
}); });
it("responds to 'pagerank'", async () => {
expect(await run(sourcecred, ["pagerank", "foo/bar", "foo/baz"])).toEqual({
exitCode: 2,
stdout: ['out(pagerank): ["foo/bar","foo/baz"]'],
stderr: ["err(pagerank)"],
});
});
it("fails given an unknown command", async () => { it("fails given an unknown command", async () => {
expect(await run(sourcecred, ["wat"])).toEqual({ expect(await run(sourcecred, ["wat"])).toEqual({
exitCode: 1, exitCode: 1,