Allow combining repositories at load time (#711)

Summary:
As a first pass toward support for analyzing whole organizations, we
allow loading multiple repositories with `sourcecred load`, combining
them into a single relational view and a single Git graph at load time.

Test Plan:
Run

```
node bin/sourcecred.js \
    load \
    sourcecred/example-git \
    sourcecred/example-github \
    sourcecred/sourcecred \
    --output sourcecred/examples \
    ;
```

and select `sourcecred/examples` from the web view. Filter “Repository”
nodes, and note that there are three.

Note that loading a single repository without `--output` still works,
that loading a single repository with `--output` still works (respecting
the alias name), and loading not exactly one repository without
`--output` yields an appropriate error message.

Note that `yarn sharness-full` still works.

wchargin-branch: load-combined
This commit is contained in:
William Chargin 2018-08-29 14:52:26 -07:00 committed by GitHub
parent 2001d3a699
commit 761b5a0875
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 73 additions and 19 deletions

View File

@ -1,6 +1,7 @@
# Changelog
## [Unreleased]
- Support combining multiple repositories into a single graph (#711)
- Normalize scores so that 1000 cred is split amongst users (#709)
- Stop persisting weights in local store (#706)
- Execute GraphQL queries with exponential backoff (#699)

View File

@ -57,6 +57,16 @@ replacing the big string of zeros with your actual token.
[ipfs/js-ipfs]: https://github.com/ipfs/js-ipfs
You can also combine data from multiple repositories into a single graph.
To do so, pass multiple repositories to the `load` command, and specify an “output name” for the repository.
For instance, the invocation
```
node bin/sourcecred.js load ipfs/js-ipfs ipfs/go-ipfs --output ipfs/meta-ipfs
```
will create a graph called `ipfs/meta-ipfs` in the cred explorer, containing the combined contents of the js-ipfs and go-ipfs repositories.
## Early Adopters
Were looking for projects who want to be early adopters of SourceCred!

View File

@ -30,11 +30,14 @@ const execDependencyGraph = require("../../tools/execDependencyGraph").default;
export default class PluginGraphCommand extends Command {
static description = "load data required for SourceCred";
static strict = false;
static args = [
{
name: "repo",
name: "repos",
required: true,
description: "the GitHub repo to load, represented as OWNER/NAME",
description:
"GitHub repos to load (one per argument), represented as OWNER/NAME",
},
];
@ -45,6 +48,12 @@ export default class PluginGraphCommand extends Command {
required: false,
options: pluginNames(),
}),
output: flags.string({
description:
"the GitHub repo under which to store output; " +
"required unless exactly one repository is specified",
required: false,
}),
"sourcecred-directory": sourcecredDirectoryFlag(),
"max-old-space-size": nodeMaxOldSpaceSizeFlag(),
"github-token": flags.string({
@ -58,30 +67,47 @@ export default class PluginGraphCommand extends Command {
async run() {
const {
args,
argv,
flags: {
output: defaultOutput,
"github-token": githubToken,
"sourcecred-directory": basedir,
"max-old-space-size": maxOldSpaceSize,
plugin,
},
} = this.parse(PluginGraphCommand);
const repo = stringToRepo(args.repo);
const repos = argv.map((s) => stringToRepo(s));
const outputRepo = (() => {
if (defaultOutput != null) {
return stringToRepo(defaultOutput);
} else if (repos.length === 1) {
return repos[0];
} else {
throw new Error("output repository not specified");
}
})();
if (!plugin) {
loadDefaultPlugins({
basedir,
plugin,
repo,
outputRepo,
repos,
githubToken,
maxOldSpaceSize,
});
} else {
loadPlugin({basedir, plugin, repo, githubToken});
loadPlugin({basedir, plugin, outputRepo, repos, githubToken});
}
}
}
function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) {
function loadDefaultPlugins({
basedir,
outputRepo,
repos,
githubToken,
maxOldSpaceSize,
}) {
if (githubToken == null) {
// TODO: This check should be abstracted so that plugins can
// specify their argument dependencies and get nicely
@ -98,26 +124,28 @@ function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) {
`--max_old_space_size=${maxOldSpaceSize}`,
"./bin/sourcecred.js",
"load",
repoToString(repo),
...repos.map((repo) => repoToString(repo)),
"--plugin",
pluginName,
"--github-token",
githubToken,
"--output",
repoToString(outputRepo),
],
deps: [],
})),
];
execDependencyGraph(tasks, {taskPassLabel: "DONE"}).then(({success}) => {
if (success) {
addToRepoRegistry({basedir, repo});
addToRepoRegistry({basedir, repo: outputRepo});
}
process.exitCode = success ? 0 : 1;
});
}
function loadPlugin({basedir, plugin, repo, githubToken}) {
function loadPlugin({basedir, plugin, outputRepo, repos, githubToken}) {
function scopedDirectory(key) {
const directory = path.join(basedir, key, repoToString(repo), plugin);
const directory = path.join(basedir, key, repoToString(outputRepo), plugin);
mkdirp.sync(directory);
return directory;
}
@ -135,14 +163,14 @@ function loadPlugin({basedir, plugin, repo, githubToken}) {
} else {
loadGithubData({
token: githubToken,
repo,
repos,
outputDirectory,
cacheDirectory,
});
}
break;
case "git":
loadGitData({repo, outputDirectory, cacheDirectory});
loadGitData({repos, outputDirectory, cacheDirectory});
break;
default:
console.error("fatal: Unknown plugin: " + (plugin: empty));

View File

@ -3,19 +3,23 @@
import fs from "fs-extra";
import path from "path";
import {Graph} from "../../core/graph";
import cloneAndLoadRepository from "./cloneAndLoadRepository";
import {createMinimalGraph} from "./createMinimalGraph";
import type {Repo} from "../../core/repo";
export type Options = {|
+repo: Repo,
+repos: $ReadOnlyArray<Repo>,
+outputDirectory: string,
+cacheDirectory: string,
|};
export function loadGitData(options: Options): Promise<void> {
const repository = cloneAndLoadRepository(options.repo);
const graph = createMinimalGraph(repository);
const graphs = options.repos.map((repo) => {
const repository = cloneAndLoadRepository(repo);
return createMinimalGraph(repository);
});
const graph = Graph.merge(graphs);
const blob = JSON.stringify(graph);
const outputFilename = path.join(options.outputDirectory, "graph.json");
return fs.writeFile(outputFilename, blob);

View File

@ -9,15 +9,26 @@ import type {Repo} from "../../core/repo";
export type Options = {|
+token: string,
+repo: Repo,
+repos: $ReadOnlyArray<Repo>,
+outputDirectory: string,
+cacheDirectory: string,
|};
export async function loadGithubData(options: Options): Promise<void> {
const response = await fetchGithubRepo(options.repo, options.token);
// We intentionally fetch repositories sequentially rather than in
// parallel, because GitHub asks that we not make concurrent
// requests. From <https://archive.is/LlkQp#88%>:
//
// > Make requests for a single user or client ID serially. Do not make
// > make requests for a single user or client ID concurrently.
const responses = [];
for (const repo of options.repos) {
responses.push(await fetchGithubRepo(repo, options.token));
}
const view = new RelationalView();
view.addData(response);
for (const response of responses) {
view.addData(response);
}
const blob = JSON.stringify(view);
const outputFilename = path.join(options.outputDirectory, "view.json");
return fs.writeFile(outputFilename, blob);