cli: implement `load` (#743)

Summary:
This ports the OClif version of `sourcecred load` to the sane CLI
system. The functionality is similar, but the interface has been
changed a bit (mostly simplifications):

  - The `SOURCECRED_GITHUB_TOKEN` can only be set by an environment
    variable, not by a command-line argument. This is standard practice
    because it is more secure: (a) other users on the same system can
    see the full command line arguments, but not the environment
    variables, and (b) it’s easier to accidentally leak a command line
    (e.g., in CI) than a full environment.

  - The `SOURCECRED_DIRECTORY` can only be set by an environment
    variable, not by a command-line argument. This is mostly just to
    simplify the interface, and also because we don’t really have a good
    name for the argument: we had previously used `-d`, which is
    unclear, but `--sourcecred-directory` is a bit redundant, while
    `--directory` is vague and `--sourcecred-directory` is redundant.
    This is an easy way out, but we can put the flag for this back in if
    it becomes a problem.

  - The `--max-old-space-size` argument has been removed in favor of a
    fixed value. It’s unlikely that users should need to change it.
    If we’re blowing an 8GB heap, we should try to not do that instead
    of increasing the heap.

  - Loading zero repositories, but specifying an output directory, is
    now valid. This is the right thing to do, but OClif got in our way
    in the previous implementation.

Test Plan:
Unit tests added, with full coverage; run `yarn unit`.

To try it out, run `yarn backend`, then `node bin/cli.js load --help` to
get started.

I also manually tested that the following invocations work (i.e., they
complete successfully, and `yarn start` shows good data):

  - `load sourcecred/sourcecred`
  - `load sourcecred/example-git{,hub} --output sourcecred/examples`

These work even when invoked from a different directory.

wchargin-branch: cli-load
This commit is contained in:
William Chargin 2018-09-02 16:07:46 -07:00 committed by GitHub
parent d685ebbdd4
commit 17172c2d96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 725 additions and 2 deletions

View File

@ -1,8 +1,8 @@
// @flow
// The repoRegistry is written by the CLI load command
// (src/oclif/commands/load.js) and is read by the RepositorySelect component
// (src/app/credExplorer/RepositorySelect.js)
// (src/oclif/commands/load.js; src/cli/load.js) and is read by the
// RepositorySelect component (src/app/credExplorer/RepositorySelect.js)
import deepEqual from "lodash.isequal";
import {toCompat, fromCompat, type Compatible} from "../../util/compat";
import type {Repo} from "../../core/repo";

View File

@ -4,6 +4,8 @@
import type {Command} from "./command";
import dedent from "../util/dedent";
import {help as loadHelp} from "./load";
const help: Command = async (args, std) => {
if (args.length === 0) {
usage(std.out);
@ -12,6 +14,7 @@ const help: Command = async (args, std) => {
const command = args[0];
const subHelps: {[string]: Command} = {
help: metaHelp,
load: loadHelp,
};
if (subHelps[command] !== undefined) {
return subHelps[command](args.slice(1), std);
@ -28,6 +31,7 @@ function usage(print: (string) => void): void {
sourcecred [--version] [--help]
Commands:
load load repository data into SourceCred
help show this help message
Use 'sourcecred help COMMAND' for help about an individual command.

View File

@ -25,6 +25,16 @@ describe("cli/help", () => {
});
});
it("prints help about 'sourcecred load'", async () => {
expect(await run(help, ["load"])).toEqual({
exitCode: 0,
stdout: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred load/),
]),
stderr: [],
});
});
it("fails when given an unknown command", async () => {
expect(await run(help, ["wat"])).toEqual({
exitCode: 1,

232
src/cli/load.js Normal file
View File

@ -0,0 +1,232 @@
// @flow
// Implementation of `sourcecred load`.
import fs from "fs";
import stringify from "json-stable-stringify";
import mkdirp from "mkdirp";
import path from "path";
import * as RepoRegistry from "../app/credExplorer/repoRegistry";
import {repoToString, stringToRepo, type Repo} from "../core/repo";
import dedent from "../util/dedent";
import type {Command} from "./command";
import * as Common from "./common";
import {loadGithubData} from "../plugins/github/loadGithubData";
import {loadGitData} from "../plugins/git/loadGitData";
const execDependencyGraph = require("../tools/execDependencyGraph").default;
function usage(print: (string) => void): void {
print(
dedent`\
usage: sourcecred load [REPO...] [--output REPO]
[--plugin PLUGIN]
[--help]
Load a repository's data into SourceCred.
Each REPO refers to a GitHub repository in the form OWNER/NAME: for
example, torvalds/linux.
Arguments:
REPO...
Repositories for which to load data.
--output REPO
Store the data under the name of this repository. When
loading multiple repositories, this can be the name of an
aggregate repository. For instance, if loading data for
repositories 'foo/bar' and 'foo/baz', the output name might
be 'foo/combined'.
If only one repository is given, the output defaults to that
repository. Otherwise, an output must be specified.
--plugin PLUGIN
Plugin for which to load data. Valid options are 'git' and
'github'. If not specified, data for all plugins will be
loaded.
--help
Show this help message and exit, as 'sourcecred help load'.
Environment variables:
SOURCECRED_GITHUB_TOKEN
API token for GitHub. This should be a 40-character hex
string. Required if using the GitHub plugin; ignored
otherwise.
To generate a token, create a "Personal access token" at
<https://github.com/settings/tokens>. When loading data for
public repositories, no special permissions are required.
For private repositories, the 'repo' scope is required.
SOURCECRED_DIRECTORY
Directory owned by SourceCred, in which data, caches,
registries, etc. are stored. Optional: defaults to a
directory 'sourcecred' under your OS's temporary directory;
namely:
${Common.defaultSourcecredDirectory()}
`.trimRight()
);
}
function die(std, message) {
std.err("fatal: " + message);
std.err("fatal: run 'sourcecred help load' for help");
return 1;
}
const load: Command = async (args, std) => {
const repos = [];
let explicitOutput: Repo | null = null;
let plugin: Common.PluginName | null = null;
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case "--help": {
usage(std.out);
return 0;
}
case "--output": {
if (explicitOutput != null)
return die(std, "'--output' given multiple times");
if (++i >= args.length)
return die(std, "'--output' given without value");
explicitOutput = stringToRepo(args[i]);
break;
}
case "--plugin": {
if (plugin != null) return die(std, "'--plugin' given multiple times");
if (++i >= args.length)
return die(std, "'--plugin' given without value");
const arg = args[i];
if (arg !== "git" && arg !== "github")
return die(std, "unknown plugin: " + JSON.stringify(arg));
plugin = arg;
break;
}
default: {
// Should be a repository.
repos.push(stringToRepo(args[i]));
break;
}
}
}
let output: Repo;
if (explicitOutput != null) {
output = explicitOutput;
} else if (repos.length === 1) {
output = repos[0];
} else {
return die(std, "output repository not specified");
}
if (plugin == null) {
return loadDefaultPlugins({std, output, repos});
} else {
return loadPlugin({std, output, repos, plugin});
}
};
const loadDefaultPlugins = async ({std, output, repos}) => {
if (Common.githubToken() == null) {
// TODO(#638): This check should be abstracted so that plugins can
// specify their argument dependencies and get nicely formatted
// errors.
return die(std, "no GitHub token specified");
}
const tasks = [
...Common.defaultPlugins().map((pluginName) => ({
id: `load-${pluginName}`,
cmd: [
process.execPath,
"--max_old_space_size=8192",
process.argv[1],
"load",
...repos.map((repo) => repoToString(repo)),
"--output",
repoToString(output),
"--plugin",
pluginName,
],
deps: [],
})),
];
const {success} = await execDependencyGraph(tasks, {taskPassLabel: "DONE"});
if (success) {
addToRepoRegistry(output);
}
return success ? 0 : 1;
};
const loadPlugin = async ({std, output, repos, plugin}) => {
function scopedDirectory(key) {
const directory = path.join(
Common.sourcecredDirectory(),
key,
repoToString(output),
plugin
);
mkdirp.sync(directory);
return directory;
}
const outputDirectory = scopedDirectory("data");
const cacheDirectory = scopedDirectory("cache");
switch (plugin) {
case "github": {
const token = Common.githubToken();
if (token == null) {
// TODO(#638): This check should be abstracted so that plugins
// can specify their argument dependencies and get nicely
// formatted errors.
return die(std, "no GitHub token specified");
}
await loadGithubData({token, repos, outputDirectory, cacheDirectory});
return 0;
}
case "git":
await loadGitData({repos, outputDirectory, cacheDirectory});
return 0;
// Unlike the previous check, which was validating user input and
// was reachable, this really should not occur.
// istanbul ignore next
default:
return die(std, "unknown plugin: " + JSON.stringify((plugin: empty)));
}
};
function addToRepoRegistry(repo) {
// TODO: Make this function transactional before loading repositories in
// parallel.
const outputFile = path.join(
Common.sourcecredDirectory(),
RepoRegistry.REPO_REGISTRY_FILE
);
let registry = null;
if (fs.existsSync(outputFile)) {
const contents = fs.readFileSync(outputFile);
const registryJSON = JSON.parse(contents.toString());
registry = RepoRegistry.fromJSON(registryJSON);
} else {
registry = RepoRegistry.emptyRegistry();
}
registry = RepoRegistry.addRepo(repo, registry);
fs.writeFileSync(outputFile, stringify(RepoRegistry.toJSON(registry)));
}
export const help: Command = async (args, std) => {
if (args.length === 0) {
usage(std.out);
return 0;
} else {
usage(std.err);
return 1;
}
};
export default load;

465
src/cli/load.test.js Normal file
View File

@ -0,0 +1,465 @@
// @flow
import fs from "fs";
import path from "path";
import tmp from "tmp";
import {run} from "./testUtil";
import load, {help} from "./load";
import * as RepoRegistry from "../app/credExplorer/repoRegistry";
import {stringToRepo} from "../core/repo";
jest.mock("../tools/execDependencyGraph", () => ({
default: jest.fn(),
}));
jest.mock("../plugins/github/loadGithubData", () => ({
loadGithubData: jest.fn(),
}));
jest.mock("../plugins/git/loadGitData", () => ({
loadGitData: jest.fn(),
}));
type JestMockFn = $Call<typeof jest.fn>;
const execDependencyGraph: JestMockFn = (require("../tools/execDependencyGraph")
.default: any);
const loadGithubData: JestMockFn = (require("../plugins/github/loadGithubData")
.loadGithubData: any);
const loadGitData: JestMockFn = (require("../plugins/git/loadGitData")
.loadGitData: any);
describe("cli/load", () => {
beforeEach(() => {
jest.clearAllMocks();
// Tests should call `newSourcecredDirectory` directly when they
// need the value. We call it here in case a test needs it to be set
// but does not care about the particular value.
newSourcecredDirectory();
});
const fakeGithubToken = "....".replace(/./g, "0123456789");
function newSourcecredDirectory() {
const dirname = tmp.dirSync().name;
process.env.SOURCECRED_DIRECTORY = dirname;
process.env.SOURCECRED_GITHUB_TOKEN = fakeGithubToken;
return dirname;
}
describe("'help' command", () => {
it("prints usage when given no arguments", async () => {
expect(await run(help, [])).toEqual({
exitCode: 0,
stdout: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred load/),
]),
stderr: [],
});
});
it("fails when given arguments", async () => {
expect(await run(help, ["foo/bar"])).toEqual({
exitCode: 1,
stdout: [],
stderr: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred load/),
]),
});
});
});
describe("'load' command", () => {
it("prints usage with '--help'", async () => {
expect(await run(load, ["--help"])).toEqual({
exitCode: 0,
stdout: expect.arrayContaining([
expect.stringMatching(/^usage: sourcecred load/),
]),
stderr: [],
});
});
describe("for multiple repositories", () => {
it("fails when no output is specified for two repos", async () => {
expect(
await run(load, ["foo/bar", "foo/baz", "--plugin", "git"])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: output repository not specified",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when no output is specified for zero repos", async () => {
expect(await run(load, ["--plugin", "git"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: output repository not specified",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when '--output' is given without a value", async () => {
expect(await run(load, ["foo/bar", "--output"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--output' given without value",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when the same '--output' is given multiple times", async () => {
expect(
await run(load, [
"foo/bar",
"--output",
"foo/baz",
"--output",
"foo/baz",
])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--output' given multiple times",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when multiple '--output's are given", async () => {
expect(
await run(load, [
"foo/bar",
"--output",
"foo/baz",
"--output",
"foo/quux",
])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--output' given multiple times",
"fatal: run 'sourcecred help load' for help",
],
});
});
});
describe("when loading single-plugin data", () => {
it("fails for an unknown plugin", async () => {
expect(await run(load, ["foo/bar", "--plugin", "wat"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
'fatal: unknown plugin: "wat"',
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when '--plugin' is given without a value", async () => {
expect(await run(load, ["foo/bar", "--plugin"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--plugin' given without value",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when the same plugin is specified multiple times", async () => {
expect(
await run(load, ["foo/bar", "--plugin", "git", "--plugin", "git"])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--plugin' given multiple times",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails when multiple plugins are specified", async () => {
expect(
await run(load, ["foo/bar", "--plugin", "git", "--plugin", "github"])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: '--plugin' given multiple times",
"fatal: run 'sourcecred help load' for help",
],
});
});
describe("for the Git plugin", () => {
it("correctly loads data", async () => {
const sourcecredDirectory = newSourcecredDirectory();
loadGitData.mockResolvedValueOnce(undefined);
expect(await run(load, ["foo/bar", "--plugin", "git"])).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
expect(execDependencyGraph).not.toHaveBeenCalled();
expect(loadGitData).toHaveBeenCalledTimes(1);
expect(loadGitData).toHaveBeenCalledWith({
repos: [stringToRepo("foo/bar")],
outputDirectory: path.join(
sourcecredDirectory,
"data",
"foo",
"bar",
"git"
),
cacheDirectory: path.join(
sourcecredDirectory,
"cache",
"foo",
"bar",
"git"
),
});
});
it("fails if `loadGitData` rejects", async () => {
loadGitData.mockRejectedValueOnce("please install Git");
expect(await run(load, ["foo/bar", "--plugin", "git"])).toEqual({
exitCode: 1,
stdout: [],
stderr: ['"please install Git"'],
});
});
});
it("succeeds for multiple repositories", async () => {
const sourcecredDirectory = newSourcecredDirectory();
loadGitData.mockResolvedValueOnce(undefined);
expect(
await run(load, [
"foo/bar",
"foo/baz",
"--output",
"foo/combined",
"--plugin",
"git",
])
).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
expect(execDependencyGraph).not.toHaveBeenCalled();
expect(loadGitData).toHaveBeenCalledTimes(1);
expect(loadGitData).toHaveBeenCalledWith({
repos: [stringToRepo("foo/bar"), stringToRepo("foo/baz")],
outputDirectory: path.join(
sourcecredDirectory,
"data",
"foo",
"combined",
"git"
),
cacheDirectory: path.join(
sourcecredDirectory,
"cache",
"foo",
"combined",
"git"
),
});
});
describe("for the GitHub plugin", () => {
it("correctly loads data", async () => {
const sourcecredDirectory = newSourcecredDirectory();
loadGithubData.mockResolvedValueOnce(undefined);
expect(await run(load, ["foo/bar", "--plugin", "github"])).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
expect(execDependencyGraph).not.toHaveBeenCalled();
expect(loadGithubData).toHaveBeenCalledTimes(1);
expect(loadGithubData).toHaveBeenCalledWith({
token: fakeGithubToken,
repos: [stringToRepo("foo/bar")],
outputDirectory: path.join(
sourcecredDirectory,
"data",
"foo",
"bar",
"github"
),
cacheDirectory: path.join(
sourcecredDirectory,
"cache",
"foo",
"bar",
"github"
),
});
});
it("fails if a token is not provided", async () => {
delete process.env.SOURCECRED_GITHUB_TOKEN;
expect(await run(load, ["foo/bar", "--plugin", "github"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: no GitHub token specified",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("fails if `loadGithubData` rejects", async () => {
loadGithubData.mockRejectedValueOnce("GitHub is down");
expect(await run(load, ["foo/bar", "--plugin", "github"])).toEqual({
exitCode: 1,
stdout: [],
stderr: ['"GitHub is down"'],
});
});
});
});
describe("when loading data for all plugins", () => {
it("fails if a GitHub token is not provided", async () => {
delete process.env.SOURCECRED_GITHUB_TOKEN;
expect(await run(load, ["foo/bar"])).toEqual({
exitCode: 1,
stdout: [],
stderr: [
"fatal: no GitHub token specified",
"fatal: run 'sourcecred help load' for help",
],
});
});
it("invokes `execDependencyGraph` with a correct set of tasks", async () => {
execDependencyGraph.mockResolvedValueOnce({success: true});
expect(
await run(load, ["foo/bar", "foo/baz", "--output", "foo/combined"])
).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
expect(execDependencyGraph).toHaveBeenCalledTimes(1);
const tasks = execDependencyGraph.mock.calls[0][0];
expect(tasks).toHaveLength(["git", "github"].length);
expect(tasks.map((task) => task.id)).toEqual(
expect.arrayContaining([
expect.stringMatching(/git(?!hub)/),
expect.stringMatching(/github/),
])
);
for (const task of tasks) {
expect(task.cmd).toEqual([
expect.stringMatching(/\bnode\b/),
expect.stringMatching(/--max_old_space_size=/),
process.argv[1],
"load",
"foo/bar",
"foo/baz",
"--output",
"foo/combined",
"--plugin",
expect.stringMatching(/^(?:git|github)$/),
]);
}
});
it("properly infers the output when loading a single repository", async () => {
execDependencyGraph.mockResolvedValueOnce({success: true});
expect(await run(load, ["foo/bar"])).toEqual({
exitCode: 0,
stdout: [],
stderr: [],
});
expect(execDependencyGraph).toHaveBeenCalledTimes(1);
const tasks = execDependencyGraph.mock.calls[0][0];
for (const task of tasks) {
expect(task.cmd).toEqual([
expect.stringMatching(/\bnode\b/),
expect.stringMatching(/--max_old_space_size=/),
process.argv[1],
"load",
"foo/bar",
"--output",
"foo/bar",
"--plugin",
expect.stringMatching(/^(?:git|github)$/),
]);
}
});
it("fails if `execDependencyGraph` returns failure", async () => {
execDependencyGraph.mockResolvedValueOnce({success: false});
expect(
await run(load, ["foo/bar", "foo/baz", "--output", "foo/combined"])
).toEqual({
exitCode: 1,
stdout: [],
stderr: [],
});
});
it("fails if `execDependencyGraph` rejects", async () => {
execDependencyGraph.mockRejectedValueOnce({success: "definitely not"});
expect(
await run(load, ["foo/bar", "foo/baz", "--output", "foo/combined"])
).toEqual({
exitCode: 1,
stdout: [],
stderr: ['{"success":"definitely not"}'],
});
});
it("writes a new repository registry if one does not exist", async () => {
const sourcecredDirectory = newSourcecredDirectory();
execDependencyGraph.mockResolvedValueOnce({success: true});
await run(load, ["foo/bar", "foo/baz", "--output", "foo/combined"]);
const blob = fs
.readFileSync(
path.join(sourcecredDirectory, RepoRegistry.REPO_REGISTRY_FILE)
)
.toString();
const registry = RepoRegistry.fromJSON(JSON.parse(blob));
expect(registry).toEqual([stringToRepo("foo/combined")]);
});
it("appends to an existing registry", async () => {
const sourcecredDirectory = newSourcecredDirectory();
fs.writeFileSync(
path.join(sourcecredDirectory, RepoRegistry.REPO_REGISTRY_FILE),
JSON.stringify(
RepoRegistry.toJSON([
stringToRepo("previous/one"),
stringToRepo("previous/two"),
])
)
);
execDependencyGraph.mockResolvedValueOnce({success: true});
await run(load, ["foo/bar", "foo/baz", "--output", "foo/combined"]);
const blob = fs
.readFileSync(
path.join(sourcecredDirectory, RepoRegistry.REPO_REGISTRY_FILE)
)
.toString();
const registry = RepoRegistry.fromJSON(JSON.parse(blob));
expect(registry).toEqual([
stringToRepo("previous/one"),
stringToRepo("previous/two"),
stringToRepo("foo/combined"),
]);
});
});
});
});

View File

@ -6,6 +6,7 @@ import type {Command} from "./command";
import {VERSION_SHORT} from "../app/version";
import help from "./help";
import load from "./load";
const sourcecred: Command = async (args, std) => {
if (args.length === 0) {
@ -19,6 +20,8 @@ const sourcecred: Command = async (args, std) => {
case "--help":
case "help":
return help(args.slice(1), std);
case "load":
return load(args.slice(1), std);
default:
std.err("fatal: unknown command: " + JSON.stringify(args[0]));
std.err("fatal: run 'sourcecred help' for commands and usage");

View File

@ -12,6 +12,7 @@ function mockCommand(name) {
}
jest.mock("./help", () => mockCommand("help"));
jest.mock("./load", () => mockCommand("load"));
describe("cli/sourcecred", () => {
it("fails with usage when invoked with no arguments", async () => {
@ -46,6 +47,14 @@ describe("cli/sourcecred", () => {
});
});
it("responds to 'load'", async () => {
expect(await run(sourcecred, ["load", "foo/bar", "foo/baz"])).toEqual({
exitCode: 2,
stdout: ['out(load): ["foo/bar","foo/baz"]'],
stderr: ["err(load)"],
});
});
it("fails given an unknown command", async () => {
expect(await run(sourcecred, ["wat"])).toEqual({
exitCode: 1,