From 761b5a0875f037976f866a27897f05aef5b33cd9 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Wed, 29 Aug 2018 14:52:26 -0700 Subject: [PATCH] Allow combining repositories at load time (#711) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: As a first pass toward support for analyzing whole organizations, we allow loading multiple repositories with `sourcecred load`, combining them into a single relational view and a single Git graph at load time. Test Plan: Run ``` node bin/sourcecred.js \ load \ sourcecred/example-git \ sourcecred/example-github \ sourcecred/sourcecred \ --output sourcecred/examples \ ; ``` and select `sourcecred/examples` from the web view. Filter “Repository” nodes, and note that there are three. Note that loading a single repository without `--output` still works, that loading a single repository with `--output` still works (respecting the alias name), and loading not exactly one repository without `--output` yields an appropriate error message. Note that `yarn sharness-full` still works. wchargin-branch: load-combined --- CHANGELOG.md | 1 + README.md | 10 ++++++ src/cli/commands/load.js | 54 +++++++++++++++++++++------- src/plugins/git/loadGitData.js | 10 ++++-- src/plugins/github/loadGithubData.js | 17 +++++++-- 5 files changed, 73 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eeed20f..f97cfa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Support combining multiple repositories into a single graph (#711) - Normalize scores so that 1000 cred is split amongst users (#709) - Stop persisting weights in local store (#706) - Execute GraphQL queries with exponential backoff (#699) diff --git a/README.md b/README.md index 5f0afea..e7eec31 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,16 @@ replacing the big string of zeros with your actual token. [ipfs/js-ipfs]: https://github.com/ipfs/js-ipfs +You can also combine data from multiple repositories into a single graph. +To do so, pass multiple repositories to the `load` command, and specify an “output name” for the repository. +For instance, the invocation + +``` +node bin/sourcecred.js load ipfs/js-ipfs ipfs/go-ipfs --output ipfs/meta-ipfs +``` + +will create a graph called `ipfs/meta-ipfs` in the cred explorer, containing the combined contents of the js-ipfs and go-ipfs repositories. + ## Early Adopters We’re looking for projects who want to be early adopters of SourceCred! diff --git a/src/cli/commands/load.js b/src/cli/commands/load.js index ed6f384..fbef929 100644 --- a/src/cli/commands/load.js +++ b/src/cli/commands/load.js @@ -30,11 +30,14 @@ const execDependencyGraph = require("../../tools/execDependencyGraph").default; export default class PluginGraphCommand extends Command { static description = "load data required for SourceCred"; + static strict = false; + static args = [ { - name: "repo", + name: "repos", required: true, - description: "the GitHub repo to load, represented as OWNER/NAME", + description: + "GitHub repos to load (one per argument), represented as OWNER/NAME", }, ]; @@ -45,6 +48,12 @@ export default class PluginGraphCommand extends Command { required: false, options: pluginNames(), }), + output: flags.string({ + description: + "the GitHub repo under which to store output; " + + "required unless exactly one repository is specified", + required: false, + }), "sourcecred-directory": sourcecredDirectoryFlag(), "max-old-space-size": nodeMaxOldSpaceSizeFlag(), "github-token": flags.string({ @@ -58,30 +67,47 @@ export default class PluginGraphCommand extends Command { async run() { const { - args, + argv, flags: { + output: defaultOutput, "github-token": githubToken, "sourcecred-directory": basedir, "max-old-space-size": maxOldSpaceSize, plugin, }, } = this.parse(PluginGraphCommand); - const repo = stringToRepo(args.repo); + const repos = argv.map((s) => stringToRepo(s)); + const outputRepo = (() => { + if (defaultOutput != null) { + return stringToRepo(defaultOutput); + } else if (repos.length === 1) { + return repos[0]; + } else { + throw new Error("output repository not specified"); + } + })(); if (!plugin) { loadDefaultPlugins({ basedir, plugin, - repo, + outputRepo, + repos, githubToken, maxOldSpaceSize, }); } else { - loadPlugin({basedir, plugin, repo, githubToken}); + loadPlugin({basedir, plugin, outputRepo, repos, githubToken}); } } } -function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) { +function loadDefaultPlugins({ + basedir, + outputRepo, + repos, + githubToken, + maxOldSpaceSize, +}) { if (githubToken == null) { // TODO: This check should be abstracted so that plugins can // specify their argument dependencies and get nicely @@ -98,26 +124,28 @@ function loadDefaultPlugins({basedir, repo, githubToken, maxOldSpaceSize}) { `--max_old_space_size=${maxOldSpaceSize}`, "./bin/sourcecred.js", "load", - repoToString(repo), + ...repos.map((repo) => repoToString(repo)), "--plugin", pluginName, "--github-token", githubToken, + "--output", + repoToString(outputRepo), ], deps: [], })), ]; execDependencyGraph(tasks, {taskPassLabel: "DONE"}).then(({success}) => { if (success) { - addToRepoRegistry({basedir, repo}); + addToRepoRegistry({basedir, repo: outputRepo}); } process.exitCode = success ? 0 : 1; }); } -function loadPlugin({basedir, plugin, repo, githubToken}) { +function loadPlugin({basedir, plugin, outputRepo, repos, githubToken}) { function scopedDirectory(key) { - const directory = path.join(basedir, key, repoToString(repo), plugin); + const directory = path.join(basedir, key, repoToString(outputRepo), plugin); mkdirp.sync(directory); return directory; } @@ -135,14 +163,14 @@ function loadPlugin({basedir, plugin, repo, githubToken}) { } else { loadGithubData({ token: githubToken, - repo, + repos, outputDirectory, cacheDirectory, }); } break; case "git": - loadGitData({repo, outputDirectory, cacheDirectory}); + loadGitData({repos, outputDirectory, cacheDirectory}); break; default: console.error("fatal: Unknown plugin: " + (plugin: empty)); diff --git a/src/plugins/git/loadGitData.js b/src/plugins/git/loadGitData.js index 6368b05..5eee1cf 100644 --- a/src/plugins/git/loadGitData.js +++ b/src/plugins/git/loadGitData.js @@ -3,19 +3,23 @@ import fs from "fs-extra"; import path from "path"; +import {Graph} from "../../core/graph"; import cloneAndLoadRepository from "./cloneAndLoadRepository"; import {createMinimalGraph} from "./createMinimalGraph"; import type {Repo} from "../../core/repo"; export type Options = {| - +repo: Repo, + +repos: $ReadOnlyArray, +outputDirectory: string, +cacheDirectory: string, |}; export function loadGitData(options: Options): Promise { - const repository = cloneAndLoadRepository(options.repo); - const graph = createMinimalGraph(repository); + const graphs = options.repos.map((repo) => { + const repository = cloneAndLoadRepository(repo); + return createMinimalGraph(repository); + }); + const graph = Graph.merge(graphs); const blob = JSON.stringify(graph); const outputFilename = path.join(options.outputDirectory, "graph.json"); return fs.writeFile(outputFilename, blob); diff --git a/src/plugins/github/loadGithubData.js b/src/plugins/github/loadGithubData.js index c0fc7e2..752d35e 100644 --- a/src/plugins/github/loadGithubData.js +++ b/src/plugins/github/loadGithubData.js @@ -9,15 +9,26 @@ import type {Repo} from "../../core/repo"; export type Options = {| +token: string, - +repo: Repo, + +repos: $ReadOnlyArray, +outputDirectory: string, +cacheDirectory: string, |}; export async function loadGithubData(options: Options): Promise { - const response = await fetchGithubRepo(options.repo, options.token); + // We intentionally fetch repositories sequentially rather than in + // parallel, because GitHub asks that we not make concurrent + // requests. From : + // + // > Make requests for a single user or client ID serially. Do not make + // > make requests for a single user or client ID concurrently. + const responses = []; + for (const repo of options.repos) { + responses.push(await fetchGithubRepo(repo, options.token)); + } const view = new RelationalView(); - view.addData(response); + for (const response of responses) { + view.addData(response); + } const blob = JSON.stringify(view); const outputFilename = path.join(options.outputDirectory, "view.json"); return fs.writeFile(outputFilename, blob);