fetchGithubRepo: use Mirror pipeline (#937)
Summary: As of this commit, `node ./bin/sourcecred.js load` uses the Mirror code, and the legacy continuation-fetching code is not included in the `sourcecred.js` bundle. We do not yet perform the commit prefetching described in #923. The code should be plenty fast for repositories that merge pull requests at least occasionally. Test Plan: Running `yarn test --full` passes. Loading `sourcecred/sourcecred` works and generates a reasonable credit attribution. Loading it again completes immediately. wchargin-branch: fetchGithubRepo-mirror
This commit is contained in:
parent
e2c99c418b
commit
08219f98bf
|
@ -29,9 +29,6 @@ module.exports = {
|
|||
backendEntryPoints: {
|
||||
sourcecred: resolveApp("src/cli/main.js"),
|
||||
//
|
||||
testContinuations: resolveApp(
|
||||
"src/plugins/github/bin/testContinuations.js"
|
||||
),
|
||||
generateGithubGraphqlFlowTypes: resolveApp(
|
||||
"src/plugins/github/bin/generateGraphqlFlowTypes.js"
|
||||
),
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12,8 +12,10 @@
|
|||
* from https://github.com/settings/tokens/new.
|
||||
*/
|
||||
|
||||
import fetchGithubRepo from "../fetchGithubRepo";
|
||||
import stringify from "json-stable-stringify";
|
||||
import tmp from "tmp";
|
||||
|
||||
import fetchGithubRepo from "../fetchGithubRepo";
|
||||
import {makeRepoId} from "../../../core/repoId";
|
||||
|
||||
function parseArgs() {
|
||||
|
@ -36,7 +38,8 @@ function parseArgs() {
|
|||
function main() {
|
||||
const args = parseArgs();
|
||||
const repoId = makeRepoId(args.owner, args.name);
|
||||
fetchGithubRepo(repoId, args.githubToken)
|
||||
const options = {token: args.githubToken, cacheDirectory: tmp.dirSync().name};
|
||||
fetchGithubRepo(repoId, options)
|
||||
.then((data) => {
|
||||
console.log(stringify(data, {space: 4}));
|
||||
})
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
// @flow
|
||||
// Ad hoc testing script for RelationalView input format consistency.
|
||||
|
||||
import Database from "better-sqlite3";
|
||||
import fs from "fs-extra";
|
||||
import stringify from "json-stable-stringify";
|
||||
import deepEqual from "lodash.isequal";
|
||||
|
||||
import {makeRepoId} from "../../../core/repoId";
|
||||
import {Mirror} from "../../../graphql/mirror";
|
||||
import fetchGithubRepo, {postQuery} from "../fetchGithubRepo";
|
||||
import type {Repository} from "../graphqlTypes";
|
||||
import {RelationalView, type RelationalViewJSON} from "../relationalView";
|
||||
import githubSchema from "../schema";
|
||||
|
||||
async function test(options: {|
|
||||
+token: string,
|
||||
+owner: string,
|
||||
+name: string,
|
||||
+graphqlId: string,
|
||||
+outputFilepaths: {|
|
||||
+continuations: string,
|
||||
+mirror: string,
|
||||
|},
|
||||
|}) {
|
||||
async function fetchViaContinuations(): Promise<RelationalViewJSON> {
|
||||
const raw = await fetchGithubRepo(
|
||||
makeRepoId(options.owner, options.name),
|
||||
options.token
|
||||
);
|
||||
const rv = new RelationalView();
|
||||
rv.addData(raw);
|
||||
return rv.toJSON();
|
||||
}
|
||||
|
||||
async function fetchViaMirror(): Promise<RelationalViewJSON> {
|
||||
const mirror = new Mirror(new Database(":memory:"), githubSchema());
|
||||
mirror.registerObject({typename: "Repository", id: options.graphqlId});
|
||||
await mirror.update((payload) => postQuery(payload, options.token), {
|
||||
nodesLimit: 100,
|
||||
nodesOfTypeLimit: 100,
|
||||
connectionPageSize: 100,
|
||||
connectionLimit: 100,
|
||||
since: new Date(0),
|
||||
now: () => new Date(),
|
||||
});
|
||||
const repository = ((mirror.extract(options.graphqlId): any): Repository);
|
||||
const rv = new RelationalView();
|
||||
rv.addRepository(repository);
|
||||
return rv.toJSON();
|
||||
}
|
||||
|
||||
function saveTo(filename: string, repo: RelationalViewJSON): Promise<void> {
|
||||
return fs.writeFile(filename, stringify(repo));
|
||||
}
|
||||
|
||||
const [viaContinuations, viaMirror] = await Promise.all([
|
||||
fetchViaContinuations(),
|
||||
fetchViaMirror(),
|
||||
]);
|
||||
|
||||
if (deepEqual(viaContinuations, viaMirror)) {
|
||||
console.log("Identical. Saving to disk...");
|
||||
} else {
|
||||
console.log("Different. Saving to disk...");
|
||||
}
|
||||
|
||||
await Promise.all([
|
||||
saveTo(options.outputFilepaths.continuations, viaContinuations),
|
||||
saveTo(options.outputFilepaths.mirror, viaMirror),
|
||||
]);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const token = process.env.SOURCECRED_GITHUB_TOKEN;
|
||||
if (args.length !== 5 || token == null) {
|
||||
const invocation = [
|
||||
"SOURCECRED_GITHUB_TOKEN=<token>",
|
||||
"node",
|
||||
"test.js",
|
||||
"REPO_OWNER",
|
||||
"REPO_NAME",
|
||||
"GRAPHQL_ID",
|
||||
"CONTINUATIONS_OUTPUT_FILENAME",
|
||||
"MIRROR_OUTPUT_FILENAME",
|
||||
];
|
||||
console.error("usage: " + invocation.join(" "));
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
const [owner, name, graphqlId, continuations, mirror] = args;
|
||||
const options = {
|
||||
token,
|
||||
owner,
|
||||
name,
|
||||
graphqlId,
|
||||
outputFilepaths: {
|
||||
continuations,
|
||||
mirror,
|
||||
},
|
||||
};
|
||||
await test(options);
|
||||
}
|
||||
|
||||
main();
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,18 @@
|
|||
// @flow
|
||||
|
||||
import {RelationalView} from "../relationalView";
|
||||
import type {GithubResponseJSON} from "../graphql";
|
||||
import type {Repository} from "../graphqlTypes";
|
||||
import {Graph} from "../../../core/graph";
|
||||
import cloneDeep from "lodash.clonedeep";
|
||||
import {createGraph} from "../createGraph";
|
||||
|
||||
export function exampleData(): GithubResponseJSON {
|
||||
export function exampleRepository(): Repository {
|
||||
return cloneDeep(require("./example-github"));
|
||||
}
|
||||
|
||||
export function exampleRelationalView(): RelationalView {
|
||||
const rv = new RelationalView();
|
||||
rv.addData(exampleData());
|
||||
rv.addRepository(exampleRepository());
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
|
|
@ -4,13 +4,18 @@
|
|||
* docstring of the default export for more details.
|
||||
*/
|
||||
|
||||
import Database from "better-sqlite3";
|
||||
import fetch from "isomorphic-fetch";
|
||||
import path from "path";
|
||||
import retry from "retry";
|
||||
|
||||
import {type RepoId, repoIdToString} from "../../core/repoId";
|
||||
import {Mirror} from "../../graphql/mirror";
|
||||
import * as Queries from "../../graphql/queries";
|
||||
import {stringify, inlineLayout, type Body} from "../../graphql/queries";
|
||||
import {createQuery, createVariables, postQueryExhaustive} from "./graphql";
|
||||
import type {GithubResponseJSON} from "./graphql";
|
||||
import type {RepoId} from "../../core/repoId";
|
||||
import * as Schema from "../../graphql/schema";
|
||||
import schema from "./schema";
|
||||
import type {Repository} from "./graphqlTypes";
|
||||
|
||||
/**
|
||||
* Scrape data from a GitHub repo using the GitHub API.
|
||||
|
@ -25,27 +30,49 @@ import type {RepoId} from "../../core/repoId";
|
|||
* scraped from the repository, with data format to be specified
|
||||
* later
|
||||
*/
|
||||
export default function fetchGithubRepo(
|
||||
export default async function fetchGithubRepo(
|
||||
repoId: RepoId,
|
||||
token: string
|
||||
): Promise<GithubResponseJSON> {
|
||||
token = String(token);
|
||||
options: {|+token: string, +cacheDirectory: string|}
|
||||
): Promise<Repository> {
|
||||
const {token, cacheDirectory} = options;
|
||||
|
||||
const validToken = /^[A-Fa-f0-9]{40}$/;
|
||||
if (!validToken.test(token)) {
|
||||
throw new Error(`Invalid token: ${token}`);
|
||||
}
|
||||
const postQueryWithToken = (payload) => postQuery(payload, token);
|
||||
|
||||
const body = createQuery();
|
||||
const variables = createVariables(repoId);
|
||||
const payload = {body, variables};
|
||||
return postQueryExhaustive(
|
||||
(somePayload) => postQuery(somePayload, token),
|
||||
payload
|
||||
).then((x: GithubResponseJSON) => {
|
||||
ensureNoMorePages(x);
|
||||
return x;
|
||||
const resolvedId: Schema.ObjectId = await resolveRepositoryGraphqlId(
|
||||
postQueryWithToken,
|
||||
repoId
|
||||
);
|
||||
|
||||
// Key the cache file against the GraphQL ID, but make sure that the
|
||||
// name is valid and uniquely identifying even on case-insensitive
|
||||
// filesystems (HFS, HFS+, APFS, NTFS) or filesystems preventing
|
||||
// equals signs in file names.
|
||||
const dbFilename = `mirror_${Buffer.from(resolvedId).toString("hex")}.db`;
|
||||
const db = new Database(path.join(cacheDirectory, dbFilename));
|
||||
const mirror = new Mirror(db, schema());
|
||||
mirror.registerObject({typename: "Repository", id: resolvedId});
|
||||
|
||||
// These are arbitrary tuning parameters.
|
||||
// TODO(#638): Design a configuration system for plugins.
|
||||
const ttlSeconds = 86400;
|
||||
const nodesLimit = 100;
|
||||
const connectionLimit = 100;
|
||||
|
||||
await mirror.update(postQueryWithToken, {
|
||||
since: new Date(Date.now() - ttlSeconds * 1000),
|
||||
now: () => new Date(),
|
||||
// These properties are arbitrary tuning parameters.
|
||||
nodesLimit,
|
||||
connectionLimit,
|
||||
// These values are the maxima allowed by GitHub.
|
||||
nodesOfTypeLimit: 100,
|
||||
connectionPageSize: 100,
|
||||
});
|
||||
return ((mirror.extract(resolvedId): any): Repository);
|
||||
}
|
||||
|
||||
const GITHUB_GRAPHQL_SERVER = "https://api.github.com/graphql";
|
||||
|
@ -185,23 +212,35 @@ export async function postQuery(
|
|||
);
|
||||
}
|
||||
|
||||
function ensureNoMorePages(result: any, path = []) {
|
||||
if (result == null) {
|
||||
return;
|
||||
}
|
||||
if (result.pageInfo) {
|
||||
if (result.pageInfo.hasNextPage) {
|
||||
console.error(result);
|
||||
throw new Error(`More pages at: ${path.join()}`);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(result)) {
|
||||
result.forEach((item, i) => {
|
||||
ensureNoMorePages(item, [...path, i]);
|
||||
});
|
||||
} else if (typeof result === "object") {
|
||||
Object.keys(result).forEach((k) => {
|
||||
ensureNoMorePages(result[k], [...path, k]);
|
||||
});
|
||||
async function resolveRepositoryGraphqlId(
|
||||
postQuery: ({+body: Body, +variables: mixed}) => Promise<any>,
|
||||
repoId: RepoId
|
||||
): Promise<Schema.ObjectId> {
|
||||
const b = Queries.build;
|
||||
const payload = {
|
||||
body: [
|
||||
b.query(
|
||||
"ResolveRepositoryId",
|
||||
[b.param("owner", "String!"), b.param("name", "String!")],
|
||||
[
|
||||
b.field(
|
||||
"repository",
|
||||
{owner: b.variable("owner"), name: b.variable("name")},
|
||||
[b.field("id")]
|
||||
),
|
||||
]
|
||||
),
|
||||
],
|
||||
variables: {owner: repoId.owner, name: repoId.name},
|
||||
};
|
||||
const data: {|+repository: null | {|+id: string|}|} = await postQuery(
|
||||
payload
|
||||
);
|
||||
if (data.repository == null) {
|
||||
throw new Error(
|
||||
`No such repository: ${repoIdToString(repoId)} ` +
|
||||
`(response data: ${JSON.stringify(data)})`
|
||||
);
|
||||
}
|
||||
return data.repository.id;
|
||||
}
|
||||
|
|
|
@ -24,11 +24,16 @@ export async function loadGithubData(options: Options): Promise<void> {
|
|||
// > make requests for a single user or client ID concurrently.
|
||||
const responses = [];
|
||||
for (const repoId of options.repoIds) {
|
||||
responses.push(await fetchGithubRepo(repoId, options.token));
|
||||
responses.push(
|
||||
await fetchGithubRepo(repoId, {
|
||||
token: options.token,
|
||||
cacheDirectory: options.cacheDirectory,
|
||||
})
|
||||
);
|
||||
}
|
||||
const view = new RelationalView();
|
||||
for (const response of responses) {
|
||||
view.addData(response);
|
||||
view.addRepository(response);
|
||||
}
|
||||
view.compressByRemovingBody();
|
||||
const blob: Uint8Array = pako.gzip(JSON.stringify(view));
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import * as R from "./relationalView";
|
||||
import * as N from "./nodes";
|
||||
import {exampleData, exampleRelationalView} from "./example/example";
|
||||
import {exampleRepository, exampleRelationalView} from "./example/example";
|
||||
import * as MapUtil from "../../util/map";
|
||||
|
||||
describe("plugins/github/relationalView", () => {
|
||||
|
@ -276,8 +276,7 @@ describe("plugins/github/relationalView", () => {
|
|||
|
||||
describe("reaction detection", () => {
|
||||
it("set of all reactions matches snapshot", () => {
|
||||
const view = new R.RelationalView();
|
||||
view.addData(exampleData());
|
||||
const view = exampleRelationalView();
|
||||
const urlToReactions = new Map();
|
||||
for (const reactable of view.reactableEntities()) {
|
||||
const url = reactable.url();
|
||||
|
@ -289,28 +288,26 @@ describe("plugins/github/relationalView", () => {
|
|||
});
|
||||
});
|
||||
|
||||
it("addData is idempotent", () => {
|
||||
it("addRepository is idempotent", () => {
|
||||
const rv1 = new R.RelationalView();
|
||||
rv1.addData(exampleData());
|
||||
rv1.addRepository(exampleRepository());
|
||||
const rv2 = new R.RelationalView();
|
||||
rv2.addData(exampleData());
|
||||
rv2.addData(exampleData());
|
||||
rv2.addRepository(exampleRepository());
|
||||
rv2.addRepository(exampleRepository());
|
||||
// may be fragile
|
||||
expect(rv1).toEqual(rv2);
|
||||
});
|
||||
|
||||
describe("compressByRemovingBody", () => {
|
||||
it("doesn't mutate the original entries", () => {
|
||||
const rv = new R.RelationalView();
|
||||
rv.addData(exampleData());
|
||||
const rv = exampleRelationalView();
|
||||
const issue0 = Array.from(rv.issues())[0];
|
||||
expect(issue0.body()).not.toEqual("");
|
||||
rv.compressByRemovingBody();
|
||||
expect(issue0.body()).not.toEqual("");
|
||||
});
|
||||
it("removes bodies from all posts", () => {
|
||||
const rv = new R.RelationalView();
|
||||
rv.addData(exampleData());
|
||||
const rv = exampleRelationalView();
|
||||
function somePostsHaveBodies() {
|
||||
for (const posts of [
|
||||
rv.issues(),
|
||||
|
@ -331,8 +328,7 @@ describe("plugins/github/relationalView", () => {
|
|||
expect(somePostsHaveBodies()).toBe(false);
|
||||
});
|
||||
it("removes messages from all commits", () => {
|
||||
const rv = new R.RelationalView();
|
||||
rv.addData(exampleData());
|
||||
const rv = exampleRelationalView();
|
||||
function someCommitsHaveMessages() {
|
||||
for (const commit of rv.commits()) {
|
||||
if (commit.message() !== "") {
|
||||
|
|
|
@ -1,15 +1,9 @@
|
|||
// @flow
|
||||
|
||||
import {exampleData} from "./example/example";
|
||||
|
||||
import translateContinuations from "./translateContinuations";
|
||||
|
||||
describe("plugins/github/translateContinuations", () => {
|
||||
describe("translateContinuations", () => {
|
||||
it("works on the example data", () => {
|
||||
expect(translateContinuations(exampleData())).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it("raises a warning if the defaultBranchRef is not a commit", () => {
|
||||
const exampleData = {
|
||||
repository: {
|
||||
|
|
Loading…
Reference in New Issue