Allow combining repositories at load time (#711)
Summary: As a first pass toward support for analyzing whole organizations, we allow loading multiple repositories with `sourcecred load`, combining them into a single relational view and a single Git graph at load time. Test Plan: Run ``` node bin/sourcecred.js \ load \ sourcecred/example-git \ sourcecred/example-github \ sourcecred/sourcecred \ --output sourcecred/examples \ ; ``` and select `sourcecred/examples` from the web view. Filter “Repository” nodes, and note that there are three. Note that loading a single repository without `--output` still works, that loading a single repository with `--output` still works (respecting the alias name), and loading not exactly one repository without `--output` yields an appropriate error message. Note that `yarn sharness-full` still works. wchargin-branch: load-combined
This commit is contained in:
parent
2001d3a699
commit
761b5a0875
|
@ -1,6 +1,7 @@
|
|||
# Changelog
|
||||
|
||||
## [Unreleased]
|
||||
- Support combining multiple repositories into a single graph (#711)
|
||||
- Normalize scores so that 1000 cred is split amongst users (#709)
|
||||
- Stop persisting weights in local store (#706)
|
||||
- Execute GraphQL queries with exponential backoff (#699)
|
||||
|
|
10
README.md
10
README.md
|
@ -57,6 +57,16 @@ replacing the big string of zeros with your actual token.
|
|||
|
||||
[ipfs/js-ipfs]: https://github.com/ipfs/js-ipfs
|
||||
|
||||
You can also combine data from multiple repositories into a single graph.
|
||||
To do so, pass multiple repositories to the `load` command, and specify an “output name” for the repository.
|
||||
For instance, the invocation
|
||||
|
||||
```
|
||||
node bin/sourcecred.js load ipfs/js-ipfs ipfs/go-ipfs --output ipfs/meta-ipfs
|
||||
```
|
||||
|
||||
will create a graph called `ipfs/meta-ipfs` in the cred explorer, containing the combined contents of the js-ipfs and go-ipfs repositories.
|
||||
|
||||
## Early Adopters
|
||||
|
||||
We’re looking for projects who want to be early adopters of SourceCred!
|
||||
|
|
|
@ -30,11 +30,14 @@ const execDependencyGraph = require("../../tools/execDependencyGraph").default;
|
|||
export default class PluginGraphCommand extends Command {
|
||||
static description = "load data required for SourceCred";
|
||||
|
||||
static strict = false;
|
||||
|
||||
static args = [
|
||||
{
|
||||
name: "repo",
|
||||
name: "repos",
|
||||
required: true,
|
||||
description: "the GitHub repo to load, represented as OWNER/NAME",
|
||||
description:
|
||||
"GitHub repos to load (one per argument), represented as OWNER/NAME",
|
||||
},
|
||||
];
|
||||
|
||||
|
@ -45,6 +48,12 @@ export default class PluginGraphCommand extends Command {
|
|||
required: false,
|
||||
options: pluginNames(),
|
||||
}),
|
||||
output: flags.string({
|
||||
description:
|
||||
"the GitHub repo under which to store output; " +
|
||||
"required unless exactly one repository is specified",
|
||||
required: false,
|
||||
}),
|
||||
"sourcecred-directory": sourcecredDirectoryFlag(),
|
||||
"max-old-space-size": nodeMaxOldSpaceSizeFlag(),
|
||||
"github-token": flags.string({
|
||||
|
@ -58,30 +67,47 @@ export default class PluginGraphCommand extends Command {
|
|||
|
||||
async run() {
|
||||
const {
|
||||
args,
|
||||
argv,
|
||||
flags: {
|
||||
output: defaultOutput,
|
||||
"github-token": githubToken,
|
||||
"sourcecred-directory": basedir,
|
||||
"max-old-space-size": maxOldSpaceSize,
|
||||
plugin,
|
||||
},
|
||||
} = this.parse(PluginGraphCommand);
|
||||
const repo = stringToRepo(args.repo);
|
||||
const repos = argv.map((s) => stringToRepo(s));
|
||||
const outputRepo = (() => {
|
||||
if (defaultOutput != null) {
|
||||
return stringToRepo(defaultOutput);
|
||||
} else if (repos.length === 1) {
|
||||
return repos[0];
|
||||
} else {
|
||||
throw new Error("output repository not specified");
|
||||
}
|
||||
})();
|
||||
if (!plugin) {
|
||||
loadDefaultPlugins({
|
||||
basedir,
|
||||
plugin,
|
||||
repo,
|
||||
outputRepo,
|
||||
repos,
|
||||
githubToken,
|
||||
maxOldSpaceSize,
|
||||
});
|
||||
} else {
|
||||
loadPlugin({basedir, plugin, repo, githubToken});
|
||||
loadPlugin({basedir, plugin, outputRepo, repos, githubToken});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) {
|
||||
function loadDefaultPlugins({
|
||||
basedir,
|
||||
outputRepo,
|
||||
repos,
|
||||
githubToken,
|
||||
maxOldSpaceSize,
|
||||
}) {
|
||||
if (githubToken == null) {
|
||||
// TODO: This check should be abstracted so that plugins can
|
||||
// specify their argument dependencies and get nicely
|
||||
|
@ -98,26 +124,28 @@ function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) {
|
|||
`--max_old_space_size=${maxOldSpaceSize}`,
|
||||
"./bin/sourcecred.js",
|
||||
"load",
|
||||
repoToString(repo),
|
||||
...repos.map((repo) => repoToString(repo)),
|
||||
"--plugin",
|
||||
pluginName,
|
||||
"--github-token",
|
||||
githubToken,
|
||||
"--output",
|
||||
repoToString(outputRepo),
|
||||
],
|
||||
deps: [],
|
||||
})),
|
||||
];
|
||||
execDependencyGraph(tasks, {taskPassLabel: "DONE"}).then(({success}) => {
|
||||
if (success) {
|
||||
addToRepoRegistry({basedir, repo});
|
||||
addToRepoRegistry({basedir, repo: outputRepo});
|
||||
}
|
||||
process.exitCode = success ? 0 : 1;
|
||||
});
|
||||
}
|
||||
|
||||
function loadPlugin({basedir, plugin, repo, githubToken}) {
|
||||
function loadPlugin({basedir, plugin, outputRepo, repos, githubToken}) {
|
||||
function scopedDirectory(key) {
|
||||
const directory = path.join(basedir, key, repoToString(repo), plugin);
|
||||
const directory = path.join(basedir, key, repoToString(outputRepo), plugin);
|
||||
mkdirp.sync(directory);
|
||||
return directory;
|
||||
}
|
||||
|
@ -135,14 +163,14 @@ function loadPlugin({basedir, plugin, repo, githubToken}) {
|
|||
} else {
|
||||
loadGithubData({
|
||||
token: githubToken,
|
||||
repo,
|
||||
repos,
|
||||
outputDirectory,
|
||||
cacheDirectory,
|
||||
});
|
||||
}
|
||||
break;
|
||||
case "git":
|
||||
loadGitData({repo, outputDirectory, cacheDirectory});
|
||||
loadGitData({repos, outputDirectory, cacheDirectory});
|
||||
break;
|
||||
default:
|
||||
console.error("fatal: Unknown plugin: " + (plugin: empty));
|
||||
|
|
|
@ -3,19 +3,23 @@
|
|||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
|
||||
import {Graph} from "../../core/graph";
|
||||
import cloneAndLoadRepository from "./cloneAndLoadRepository";
|
||||
import {createMinimalGraph} from "./createMinimalGraph";
|
||||
import type {Repo} from "../../core/repo";
|
||||
|
||||
export type Options = {|
|
||||
+repo: Repo,
|
||||
+repos: $ReadOnlyArray<Repo>,
|
||||
+outputDirectory: string,
|
||||
+cacheDirectory: string,
|
||||
|};
|
||||
|
||||
export function loadGitData(options: Options): Promise<void> {
|
||||
const repository = cloneAndLoadRepository(options.repo);
|
||||
const graph = createMinimalGraph(repository);
|
||||
const graphs = options.repos.map((repo) => {
|
||||
const repository = cloneAndLoadRepository(repo);
|
||||
return createMinimalGraph(repository);
|
||||
});
|
||||
const graph = Graph.merge(graphs);
|
||||
const blob = JSON.stringify(graph);
|
||||
const outputFilename = path.join(options.outputDirectory, "graph.json");
|
||||
return fs.writeFile(outputFilename, blob);
|
||||
|
|
|
@ -9,15 +9,26 @@ import type {Repo} from "../../core/repo";
|
|||
|
||||
export type Options = {|
|
||||
+token: string,
|
||||
+repo: Repo,
|
||||
+repos: $ReadOnlyArray<Repo>,
|
||||
+outputDirectory: string,
|
||||
+cacheDirectory: string,
|
||||
|};
|
||||
|
||||
export async function loadGithubData(options: Options): Promise<void> {
|
||||
const response = await fetchGithubRepo(options.repo, options.token);
|
||||
// We intentionally fetch repositories sequentially rather than in
|
||||
// parallel, because GitHub asks that we not make concurrent
|
||||
// requests. From <https://archive.is/LlkQp#88%>:
|
||||
//
|
||||
// > Make requests for a single user or client ID serially. Do not make
|
||||
// > make requests for a single user or client ID concurrently.
|
||||
const responses = [];
|
||||
for (const repo of options.repos) {
|
||||
responses.push(await fetchGithubRepo(repo, options.token));
|
||||
}
|
||||
const view = new RelationalView();
|
||||
view.addData(response);
|
||||
for (const response of responses) {
|
||||
view.addData(response);
|
||||
}
|
||||
const blob = JSON.stringify(view);
|
||||
const outputFilename = path.join(options.outputDirectory, "view.json");
|
||||
return fs.writeFile(outputFilename, blob);
|
||||
|
|
Loading…
Reference in New Issue