Combine the merge and score commands (#1859)

As discussed [in Discord]:

At present, cli2 is organized around the following commands:

- init: help setting up a new SourceCred instance (not yet implemented)
- load [PLUGINS...]: load data from all active plugins (default) or
specific named plugins
- graph: regenerate graph for every plugin (will likely give this
[PLUGINS...] args in the future)
- merge: merge the generated graphs together
- score: compute scores over all the graphs

I'm going to change this to fold the merge command into the score
command. This is inspired by realizing that setting the weights.json
should get incorporated in the merge command from an implementation
perspective, but in the score command conceptually.

Thus, having merge and score as separate commands is actually an
anti-affordance, it allows the user to get into a confusing state where
the graph in their output directory is in a different state from the
scores.

(also, merge takes about 2s on sc's cred instance, so it isn't a huge
performance penalty on the flow of "recompute scores with new
parameters")

cc @wchargin

Also, I wonder whether we should persist the weightedGraph to disk at
all, seeing as it is already included in the credResult.json.

Storing the graph on its own is inefficient from a space perspective,
although it can be nice to get an eyeball sense of how big the
graph is compared to the associated cred data. For now, I'm not going to
write the weightedGraph to disk, simply because it's more convenient to
implement.

If there's a need, we can add it back later.

[in Discord]: https://discordapp.com/channels/453243919774253079/454007907663740939/721603543278157905

Test plan: manually tested it on a local instance
This commit is contained in:
Dandelion Mané 2020-06-15 12:40:45 -07:00 committed by GitHub
parent e49823921d
commit 53d3bd6766
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 61 deletions

View File

@ -1,53 +0,0 @@
// @flow
import fs from "fs-extra";
import stringify from "json-stable-stringify";
import {join as pathJoin} from "path";
import {LoggingTaskReporter} from "../util/taskReporter";
import type {Command} from "./command";
import {makePluginDir, loadInstanceConfig} from "./common";
import {
toJSON as weightedGraphToJSON,
fromJSON as weightedGraphFromJSON,
type WeightedGraph,
merge,
} from "../core/weightedGraph";
function die(std, message) {
std.err("fatal: " + message);
return 1;
}
const mergeCommand: Command = async (args, std) => {
if (args.length !== 0) {
return die(std, "usage: sourcecred merge");
}
const taskReporter = new LoggingTaskReporter();
taskReporter.start("merge");
const baseDir = process.cwd();
const config = await loadInstanceConfig(baseDir);
const graphOutputPrefix = ["output", "graphs"];
async function loadGraph(pluginName): Promise<WeightedGraph> {
const outputDir = makePluginDir(baseDir, graphOutputPrefix, pluginName);
const outputPath = pathJoin(outputDir, "graph.json");
const graphJSON = JSON.parse(await fs.readFile(outputPath));
return weightedGraphFromJSON(graphJSON);
}
const pluginNames = Array.from(config.bundledPlugins.keys());
const graphs = await Promise.all(pluginNames.map(loadGraph));
// TODO: Support identity merging.
// TODO: Support weight overrides.
const combinedGraph = merge(graphs);
const outputPath = pathJoin(baseDir, "output", "graph.json");
const serializedGraph = stringify(weightedGraphToJSON(combinedGraph));
await fs.writeFile(outputPath, serializedGraph);
taskReporter.finish("merge");
return 0;
};
export default mergeCommand;

View File

@ -5,9 +5,10 @@ import stringify from "json-stable-stringify";
import {join as pathJoin} from "path";
import type {Command} from "./command";
import {loadInstanceConfig} from "./common";
import {makePluginDir, loadInstanceConfig} from "./common";
import {fromJSON as weightedGraphFromJSON} from "../core/weightedGraph";
import {defaultParams} from "../analysis/timeline/params";
import {type WeightedGraph, merge} from "../core/weightedGraph";
import {LoggingTaskReporter} from "../util/taskReporter";
import {
compute,
@ -34,9 +35,17 @@ const scoreCommand: Command = async (args, std) => {
const baseDir = process.cwd();
const config = await loadInstanceConfig(baseDir);
const graphFilePath = pathJoin(baseDir, "output", "graph.json");
const graphJSON = JSON.parse(await fs.readFile(graphFilePath));
const graph = weightedGraphFromJSON(graphJSON);
const graphOutputPrefix = ["output", "graphs"];
async function loadGraph(pluginName): Promise<WeightedGraph> {
const outputDir = makePluginDir(baseDir, graphOutputPrefix, pluginName);
const outputPath = pathJoin(outputDir, "graph.json");
const graphJSON = JSON.parse(await fs.readFile(outputPath));
return weightedGraphFromJSON(graphJSON);
}
const pluginNames = Array.from(config.bundledPlugins.keys());
const graphs = await Promise.all(pluginNames.map(loadGraph));
const combinedGraph = merge(graphs);
const plugins = Array.from(config.bundledPlugins.values());
const declarations = plugins.map((x) => x.declaration());
@ -44,7 +53,7 @@ const scoreCommand: Command = async (args, std) => {
// TODO: Support loading params from config.
const params = defaultParams();
const credResult = await compute(graph, params, declarations);
const credResult = await compute(combinedGraph, params, declarations);
const compressed = compressByThreshold(credResult, CRED_THRESHOLD);
const credJSON = stringify(credResultToJSON(compressed));
const outputPath = pathJoin(baseDir, "output", "credResult.json");

View File

@ -4,7 +4,6 @@ import type {Command} from "./command";
import load from "./load";
import graph from "./graph";
import merge from "./merge";
import score from "./score";
const sourcecred: Command = async (args, std) => {
@ -17,8 +16,6 @@ const sourcecred: Command = async (args, std) => {
return load(args.slice(1), std);
case "graph":
return graph(args.slice(1), std);
case "merge":
return merge(args.slice(1), std);
case "score":
return score(args.slice(1), std);
default: