fetchGithubRepo: use Mirror pipeline (#937)
Summary: As of this commit, `node ./bin/sourcecred.js load` uses the Mirror code, and the legacy continuation-fetching code is not included in the `sourcecred.js` bundle. We do not yet perform the commit prefetching described in #923. The code should be plenty fast for repositories that merge pull requests at least occasionally. Test Plan: Running `yarn test --full` passes. Loading `sourcecred/sourcecred` works and generates a reasonable credit attribution. Loading it again completes immediately. wchargin-branch: fetchGithubRepo-mirror
This commit is contained in:
parent
e2c99c418b
commit
08219f98bf
|
@ -29,9 +29,6 @@ module.exports = {
|
||||||
backendEntryPoints: {
|
backendEntryPoints: {
|
||||||
sourcecred: resolveApp("src/cli/main.js"),
|
sourcecred: resolveApp("src/cli/main.js"),
|
||||||
//
|
//
|
||||||
testContinuations: resolveApp(
|
|
||||||
"src/plugins/github/bin/testContinuations.js"
|
|
||||||
),
|
|
||||||
generateGithubGraphqlFlowTypes: resolveApp(
|
generateGithubGraphqlFlowTypes: resolveApp(
|
||||||
"src/plugins/github/bin/generateGraphqlFlowTypes.js"
|
"src/plugins/github/bin/generateGraphqlFlowTypes.js"
|
||||||
),
|
),
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12,8 +12,10 @@
|
||||||
* from https://github.com/settings/tokens/new.
|
* from https://github.com/settings/tokens/new.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import fetchGithubRepo from "../fetchGithubRepo";
|
|
||||||
import stringify from "json-stable-stringify";
|
import stringify from "json-stable-stringify";
|
||||||
|
import tmp from "tmp";
|
||||||
|
|
||||||
|
import fetchGithubRepo from "../fetchGithubRepo";
|
||||||
import {makeRepoId} from "../../../core/repoId";
|
import {makeRepoId} from "../../../core/repoId";
|
||||||
|
|
||||||
function parseArgs() {
|
function parseArgs() {
|
||||||
|
@ -36,7 +38,8 @@ function parseArgs() {
|
||||||
function main() {
|
function main() {
|
||||||
const args = parseArgs();
|
const args = parseArgs();
|
||||||
const repoId = makeRepoId(args.owner, args.name);
|
const repoId = makeRepoId(args.owner, args.name);
|
||||||
fetchGithubRepo(repoId, args.githubToken)
|
const options = {token: args.githubToken, cacheDirectory: tmp.dirSync().name};
|
||||||
|
fetchGithubRepo(repoId, options)
|
||||||
.then((data) => {
|
.then((data) => {
|
||||||
console.log(stringify(data, {space: 4}));
|
console.log(stringify(data, {space: 4}));
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,106 +0,0 @@
|
||||||
// @flow
|
|
||||||
// Ad hoc testing script for RelationalView input format consistency.
|
|
||||||
|
|
||||||
import Database from "better-sqlite3";
|
|
||||||
import fs from "fs-extra";
|
|
||||||
import stringify from "json-stable-stringify";
|
|
||||||
import deepEqual from "lodash.isequal";
|
|
||||||
|
|
||||||
import {makeRepoId} from "../../../core/repoId";
|
|
||||||
import {Mirror} from "../../../graphql/mirror";
|
|
||||||
import fetchGithubRepo, {postQuery} from "../fetchGithubRepo";
|
|
||||||
import type {Repository} from "../graphqlTypes";
|
|
||||||
import {RelationalView, type RelationalViewJSON} from "../relationalView";
|
|
||||||
import githubSchema from "../schema";
|
|
||||||
|
|
||||||
async function test(options: {|
|
|
||||||
+token: string,
|
|
||||||
+owner: string,
|
|
||||||
+name: string,
|
|
||||||
+graphqlId: string,
|
|
||||||
+outputFilepaths: {|
|
|
||||||
+continuations: string,
|
|
||||||
+mirror: string,
|
|
||||||
|},
|
|
||||||
|}) {
|
|
||||||
async function fetchViaContinuations(): Promise<RelationalViewJSON> {
|
|
||||||
const raw = await fetchGithubRepo(
|
|
||||||
makeRepoId(options.owner, options.name),
|
|
||||||
options.token
|
|
||||||
);
|
|
||||||
const rv = new RelationalView();
|
|
||||||
rv.addData(raw);
|
|
||||||
return rv.toJSON();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchViaMirror(): Promise<RelationalViewJSON> {
|
|
||||||
const mirror = new Mirror(new Database(":memory:"), githubSchema());
|
|
||||||
mirror.registerObject({typename: "Repository", id: options.graphqlId});
|
|
||||||
await mirror.update((payload) => postQuery(payload, options.token), {
|
|
||||||
nodesLimit: 100,
|
|
||||||
nodesOfTypeLimit: 100,
|
|
||||||
connectionPageSize: 100,
|
|
||||||
connectionLimit: 100,
|
|
||||||
since: new Date(0),
|
|
||||||
now: () => new Date(),
|
|
||||||
});
|
|
||||||
const repository = ((mirror.extract(options.graphqlId): any): Repository);
|
|
||||||
const rv = new RelationalView();
|
|
||||||
rv.addRepository(repository);
|
|
||||||
return rv.toJSON();
|
|
||||||
}
|
|
||||||
|
|
||||||
function saveTo(filename: string, repo: RelationalViewJSON): Promise<void> {
|
|
||||||
return fs.writeFile(filename, stringify(repo));
|
|
||||||
}
|
|
||||||
|
|
||||||
const [viaContinuations, viaMirror] = await Promise.all([
|
|
||||||
fetchViaContinuations(),
|
|
||||||
fetchViaMirror(),
|
|
||||||
]);
|
|
||||||
|
|
||||||
if (deepEqual(viaContinuations, viaMirror)) {
|
|
||||||
console.log("Identical. Saving to disk...");
|
|
||||||
} else {
|
|
||||||
console.log("Different. Saving to disk...");
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.all([
|
|
||||||
saveTo(options.outputFilepaths.continuations, viaContinuations),
|
|
||||||
saveTo(options.outputFilepaths.mirror, viaMirror),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const args = process.argv.slice(2);
|
|
||||||
const token = process.env.SOURCECRED_GITHUB_TOKEN;
|
|
||||||
if (args.length !== 5 || token == null) {
|
|
||||||
const invocation = [
|
|
||||||
"SOURCECRED_GITHUB_TOKEN=<token>",
|
|
||||||
"node",
|
|
||||||
"test.js",
|
|
||||||
"REPO_OWNER",
|
|
||||||
"REPO_NAME",
|
|
||||||
"GRAPHQL_ID",
|
|
||||||
"CONTINUATIONS_OUTPUT_FILENAME",
|
|
||||||
"MIRROR_OUTPUT_FILENAME",
|
|
||||||
];
|
|
||||||
console.error("usage: " + invocation.join(" "));
|
|
||||||
process.exitCode = 1;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const [owner, name, graphqlId, continuations, mirror] = args;
|
|
||||||
const options = {
|
|
||||||
token,
|
|
||||||
owner,
|
|
||||||
name,
|
|
||||||
graphqlId,
|
|
||||||
outputFilepaths: {
|
|
||||||
continuations,
|
|
||||||
mirror,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
await test(options);
|
|
||||||
}
|
|
||||||
|
|
||||||
main();
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,18 @@
|
||||||
// @flow
|
// @flow
|
||||||
|
|
||||||
import {RelationalView} from "../relationalView";
|
import {RelationalView} from "../relationalView";
|
||||||
import type {GithubResponseJSON} from "../graphql";
|
import type {Repository} from "../graphqlTypes";
|
||||||
import {Graph} from "../../../core/graph";
|
import {Graph} from "../../../core/graph";
|
||||||
import cloneDeep from "lodash.clonedeep";
|
import cloneDeep from "lodash.clonedeep";
|
||||||
import {createGraph} from "../createGraph";
|
import {createGraph} from "../createGraph";
|
||||||
|
|
||||||
export function exampleData(): GithubResponseJSON {
|
export function exampleRepository(): Repository {
|
||||||
return cloneDeep(require("./example-github"));
|
return cloneDeep(require("./example-github"));
|
||||||
}
|
}
|
||||||
|
|
||||||
export function exampleRelationalView(): RelationalView {
|
export function exampleRelationalView(): RelationalView {
|
||||||
const rv = new RelationalView();
|
const rv = new RelationalView();
|
||||||
rv.addData(exampleData());
|
rv.addRepository(exampleRepository());
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,13 +4,18 @@
|
||||||
* docstring of the default export for more details.
|
* docstring of the default export for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import Database from "better-sqlite3";
|
||||||
import fetch from "isomorphic-fetch";
|
import fetch from "isomorphic-fetch";
|
||||||
|
import path from "path";
|
||||||
import retry from "retry";
|
import retry from "retry";
|
||||||
|
|
||||||
|
import {type RepoId, repoIdToString} from "../../core/repoId";
|
||||||
|
import {Mirror} from "../../graphql/mirror";
|
||||||
|
import * as Queries from "../../graphql/queries";
|
||||||
import {stringify, inlineLayout, type Body} from "../../graphql/queries";
|
import {stringify, inlineLayout, type Body} from "../../graphql/queries";
|
||||||
import {createQuery, createVariables, postQueryExhaustive} from "./graphql";
|
import * as Schema from "../../graphql/schema";
|
||||||
import type {GithubResponseJSON} from "./graphql";
|
import schema from "./schema";
|
||||||
import type {RepoId} from "../../core/repoId";
|
import type {Repository} from "./graphqlTypes";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrape data from a GitHub repo using the GitHub API.
|
* Scrape data from a GitHub repo using the GitHub API.
|
||||||
|
@ -25,27 +30,49 @@ import type {RepoId} from "../../core/repoId";
|
||||||
* scraped from the repository, with data format to be specified
|
* scraped from the repository, with data format to be specified
|
||||||
* later
|
* later
|
||||||
*/
|
*/
|
||||||
export default function fetchGithubRepo(
|
export default async function fetchGithubRepo(
|
||||||
repoId: RepoId,
|
repoId: RepoId,
|
||||||
token: string
|
options: {|+token: string, +cacheDirectory: string|}
|
||||||
): Promise<GithubResponseJSON> {
|
): Promise<Repository> {
|
||||||
token = String(token);
|
const {token, cacheDirectory} = options;
|
||||||
|
|
||||||
const validToken = /^[A-Fa-f0-9]{40}$/;
|
const validToken = /^[A-Fa-f0-9]{40}$/;
|
||||||
if (!validToken.test(token)) {
|
if (!validToken.test(token)) {
|
||||||
throw new Error(`Invalid token: ${token}`);
|
throw new Error(`Invalid token: ${token}`);
|
||||||
}
|
}
|
||||||
|
const postQueryWithToken = (payload) => postQuery(payload, token);
|
||||||
|
|
||||||
const body = createQuery();
|
const resolvedId: Schema.ObjectId = await resolveRepositoryGraphqlId(
|
||||||
const variables = createVariables(repoId);
|
postQueryWithToken,
|
||||||
const payload = {body, variables};
|
repoId
|
||||||
return postQueryExhaustive(
|
);
|
||||||
(somePayload) => postQuery(somePayload, token),
|
|
||||||
payload
|
// Key the cache file against the GraphQL ID, but make sure that the
|
||||||
).then((x: GithubResponseJSON) => {
|
// name is valid and uniquely identifying even on case-insensitive
|
||||||
ensureNoMorePages(x);
|
// filesystems (HFS, HFS+, APFS, NTFS) or filesystems preventing
|
||||||
return x;
|
// equals signs in file names.
|
||||||
|
const dbFilename = `mirror_${Buffer.from(resolvedId).toString("hex")}.db`;
|
||||||
|
const db = new Database(path.join(cacheDirectory, dbFilename));
|
||||||
|
const mirror = new Mirror(db, schema());
|
||||||
|
mirror.registerObject({typename: "Repository", id: resolvedId});
|
||||||
|
|
||||||
|
// These are arbitrary tuning parameters.
|
||||||
|
// TODO(#638): Design a configuration system for plugins.
|
||||||
|
const ttlSeconds = 86400;
|
||||||
|
const nodesLimit = 100;
|
||||||
|
const connectionLimit = 100;
|
||||||
|
|
||||||
|
await mirror.update(postQueryWithToken, {
|
||||||
|
since: new Date(Date.now() - ttlSeconds * 1000),
|
||||||
|
now: () => new Date(),
|
||||||
|
// These properties are arbitrary tuning parameters.
|
||||||
|
nodesLimit,
|
||||||
|
connectionLimit,
|
||||||
|
// These values are the maxima allowed by GitHub.
|
||||||
|
nodesOfTypeLimit: 100,
|
||||||
|
connectionPageSize: 100,
|
||||||
});
|
});
|
||||||
|
return ((mirror.extract(resolvedId): any): Repository);
|
||||||
}
|
}
|
||||||
|
|
||||||
const GITHUB_GRAPHQL_SERVER = "https://api.github.com/graphql";
|
const GITHUB_GRAPHQL_SERVER = "https://api.github.com/graphql";
|
||||||
|
@ -185,23 +212,35 @@ export async function postQuery(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function ensureNoMorePages(result: any, path = []) {
|
async function resolveRepositoryGraphqlId(
|
||||||
if (result == null) {
|
postQuery: ({+body: Body, +variables: mixed}) => Promise<any>,
|
||||||
return;
|
repoId: RepoId
|
||||||
}
|
): Promise<Schema.ObjectId> {
|
||||||
if (result.pageInfo) {
|
const b = Queries.build;
|
||||||
if (result.pageInfo.hasNextPage) {
|
const payload = {
|
||||||
console.error(result);
|
body: [
|
||||||
throw new Error(`More pages at: ${path.join()}`);
|
b.query(
|
||||||
}
|
"ResolveRepositoryId",
|
||||||
}
|
[b.param("owner", "String!"), b.param("name", "String!")],
|
||||||
if (Array.isArray(result)) {
|
[
|
||||||
result.forEach((item, i) => {
|
b.field(
|
||||||
ensureNoMorePages(item, [...path, i]);
|
"repository",
|
||||||
});
|
{owner: b.variable("owner"), name: b.variable("name")},
|
||||||
} else if (typeof result === "object") {
|
[b.field("id")]
|
||||||
Object.keys(result).forEach((k) => {
|
),
|
||||||
ensureNoMorePages(result[k], [...path, k]);
|
]
|
||||||
});
|
),
|
||||||
|
],
|
||||||
|
variables: {owner: repoId.owner, name: repoId.name},
|
||||||
|
};
|
||||||
|
const data: {|+repository: null | {|+id: string|}|} = await postQuery(
|
||||||
|
payload
|
||||||
|
);
|
||||||
|
if (data.repository == null) {
|
||||||
|
throw new Error(
|
||||||
|
`No such repository: ${repoIdToString(repoId)} ` +
|
||||||
|
`(response data: ${JSON.stringify(data)})`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
return data.repository.id;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,16 @@ export async function loadGithubData(options: Options): Promise<void> {
|
||||||
// > make requests for a single user or client ID concurrently.
|
// > make requests for a single user or client ID concurrently.
|
||||||
const responses = [];
|
const responses = [];
|
||||||
for (const repoId of options.repoIds) {
|
for (const repoId of options.repoIds) {
|
||||||
responses.push(await fetchGithubRepo(repoId, options.token));
|
responses.push(
|
||||||
|
await fetchGithubRepo(repoId, {
|
||||||
|
token: options.token,
|
||||||
|
cacheDirectory: options.cacheDirectory,
|
||||||
|
})
|
||||||
|
);
|
||||||
}
|
}
|
||||||
const view = new RelationalView();
|
const view = new RelationalView();
|
||||||
for (const response of responses) {
|
for (const response of responses) {
|
||||||
view.addData(response);
|
view.addRepository(response);
|
||||||
}
|
}
|
||||||
view.compressByRemovingBody();
|
view.compressByRemovingBody();
|
||||||
const blob: Uint8Array = pako.gzip(JSON.stringify(view));
|
const blob: Uint8Array = pako.gzip(JSON.stringify(view));
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import * as R from "./relationalView";
|
import * as R from "./relationalView";
|
||||||
import * as N from "./nodes";
|
import * as N from "./nodes";
|
||||||
import {exampleData, exampleRelationalView} from "./example/example";
|
import {exampleRepository, exampleRelationalView} from "./example/example";
|
||||||
import * as MapUtil from "../../util/map";
|
import * as MapUtil from "../../util/map";
|
||||||
|
|
||||||
describe("plugins/github/relationalView", () => {
|
describe("plugins/github/relationalView", () => {
|
||||||
|
@ -276,8 +276,7 @@ describe("plugins/github/relationalView", () => {
|
||||||
|
|
||||||
describe("reaction detection", () => {
|
describe("reaction detection", () => {
|
||||||
it("set of all reactions matches snapshot", () => {
|
it("set of all reactions matches snapshot", () => {
|
||||||
const view = new R.RelationalView();
|
const view = exampleRelationalView();
|
||||||
view.addData(exampleData());
|
|
||||||
const urlToReactions = new Map();
|
const urlToReactions = new Map();
|
||||||
for (const reactable of view.reactableEntities()) {
|
for (const reactable of view.reactableEntities()) {
|
||||||
const url = reactable.url();
|
const url = reactable.url();
|
||||||
|
@ -289,28 +288,26 @@ describe("plugins/github/relationalView", () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it("addData is idempotent", () => {
|
it("addRepository is idempotent", () => {
|
||||||
const rv1 = new R.RelationalView();
|
const rv1 = new R.RelationalView();
|
||||||
rv1.addData(exampleData());
|
rv1.addRepository(exampleRepository());
|
||||||
const rv2 = new R.RelationalView();
|
const rv2 = new R.RelationalView();
|
||||||
rv2.addData(exampleData());
|
rv2.addRepository(exampleRepository());
|
||||||
rv2.addData(exampleData());
|
rv2.addRepository(exampleRepository());
|
||||||
// may be fragile
|
// may be fragile
|
||||||
expect(rv1).toEqual(rv2);
|
expect(rv1).toEqual(rv2);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("compressByRemovingBody", () => {
|
describe("compressByRemovingBody", () => {
|
||||||
it("doesn't mutate the original entries", () => {
|
it("doesn't mutate the original entries", () => {
|
||||||
const rv = new R.RelationalView();
|
const rv = exampleRelationalView();
|
||||||
rv.addData(exampleData());
|
|
||||||
const issue0 = Array.from(rv.issues())[0];
|
const issue0 = Array.from(rv.issues())[0];
|
||||||
expect(issue0.body()).not.toEqual("");
|
expect(issue0.body()).not.toEqual("");
|
||||||
rv.compressByRemovingBody();
|
rv.compressByRemovingBody();
|
||||||
expect(issue0.body()).not.toEqual("");
|
expect(issue0.body()).not.toEqual("");
|
||||||
});
|
});
|
||||||
it("removes bodies from all posts", () => {
|
it("removes bodies from all posts", () => {
|
||||||
const rv = new R.RelationalView();
|
const rv = exampleRelationalView();
|
||||||
rv.addData(exampleData());
|
|
||||||
function somePostsHaveBodies() {
|
function somePostsHaveBodies() {
|
||||||
for (const posts of [
|
for (const posts of [
|
||||||
rv.issues(),
|
rv.issues(),
|
||||||
|
@ -331,8 +328,7 @@ describe("plugins/github/relationalView", () => {
|
||||||
expect(somePostsHaveBodies()).toBe(false);
|
expect(somePostsHaveBodies()).toBe(false);
|
||||||
});
|
});
|
||||||
it("removes messages from all commits", () => {
|
it("removes messages from all commits", () => {
|
||||||
const rv = new R.RelationalView();
|
const rv = exampleRelationalView();
|
||||||
rv.addData(exampleData());
|
|
||||||
function someCommitsHaveMessages() {
|
function someCommitsHaveMessages() {
|
||||||
for (const commit of rv.commits()) {
|
for (const commit of rv.commits()) {
|
||||||
if (commit.message() !== "") {
|
if (commit.message() !== "") {
|
||||||
|
|
|
@ -1,15 +1,9 @@
|
||||||
// @flow
|
// @flow
|
||||||
|
|
||||||
import {exampleData} from "./example/example";
|
|
||||||
|
|
||||||
import translateContinuations from "./translateContinuations";
|
import translateContinuations from "./translateContinuations";
|
||||||
|
|
||||||
describe("plugins/github/translateContinuations", () => {
|
describe("plugins/github/translateContinuations", () => {
|
||||||
describe("translateContinuations", () => {
|
describe("translateContinuations", () => {
|
||||||
it("works on the example data", () => {
|
|
||||||
expect(translateContinuations(exampleData())).toMatchSnapshot();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("raises a warning if the defaultBranchRef is not a commit", () => {
|
it("raises a warning if the defaultBranchRef is not a commit", () => {
|
||||||
const exampleData = {
|
const exampleData = {
|
||||||
repository: {
|
repository: {
|
||||||
|
|
Loading…
Reference in New Issue