relationalView: use new data format (#934)

Summary:
This makes significant progress toward #923. As of this commit, it is
possible to use the Mirror module for the whole loading pipeline. This
process may be slow for repositories that do not use pull requests at
all (more precisely, that have large connected commit subgraphs none of
whose nodes is the merge commit of a pull request; see #920 for details)
so it is not yet the default codepath.

Test Plan:
Existing unit tests should suffice. For extra testing, I’ve added a
script that fetches a repository both via the old continuations logic
and the new Mirror logic, then constructs relational views and checks
whether the data is the same. For `example-github`, the views are
identical. For `sourcecred`, they are not: the old continuations logic
erroneously omits two commits, which the Mirror logic includes.

You can run the test like this:

```
$ node ./bin/testContinuations.js \
> sourcecred sourcecred MDEwOlJlcG9zaXRvcnkxMjAxNDU1NzA= \
> /tmp/continuations.json /tmp/mirror.json \
> 2> >(jq . >&2)
{
  "child": "0d38dde23a6de831315f3643a7d2bc15e8df7678",
  "parent": "cb8ba0eaa1abc1f921e7165bb19e29b40723ce65",
  "type": "UNKNOWN_PARENT_OID"
}
{
  "child": "d152f48ce4c2ed1d046bf6ed4f139e7e393ea660",
  "parent": "de7a8723963d9cd0437ef34f5942a071b850c0e7",
  "type": "UNKNOWN_PARENT_OID"
}
Different. Saving to disk...
```

Use `diff -u <(jq . /tmp/continuations.json) <(jq . /tmp/mirror.json)`
to inspect the differences, and note that exactly the two missing
commits have been added and that there are no other changes. (The diff
is small: just 51 lines of nicely formatted JSON.) The full log is here:
<https://gist.github.com/wchargin/e159cac9dcf3cc3b1efbd54f59e24e0b>

I also generated the `sourcecred/sourcecred` cred attribution and viewed
it with `yarn start`, which seems to work fine.

wchargin-branch: relationalview-new-data-format
This commit is contained in:
William Chargin 2018-10-23 16:42:49 -07:00 committed by GitHub
parent 58e98124ac
commit e2c99c418b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 253 additions and 38 deletions

View File

@ -29,6 +29,9 @@ module.exports = {
backendEntryPoints: { backendEntryPoints: {
sourcecred: resolveApp("src/cli/main.js"), sourcecred: resolveApp("src/cli/main.js"),
// //
testContinuations: resolveApp(
"src/plugins/github/bin/testContinuations.js"
),
generateGithubGraphqlFlowTypes: resolveApp( generateGithubGraphqlFlowTypes: resolveApp(
"src/plugins/github/bin/generateGraphqlFlowTypes.js" "src/plugins/github/bin/generateGraphqlFlowTypes.js"
), ),

View File

@ -0,0 +1,106 @@
// @flow
// Ad hoc testing script for RelationalView input format consistency.
import Database from "better-sqlite3";
import fs from "fs-extra";
import stringify from "json-stable-stringify";
import deepEqual from "lodash.isequal";
import {makeRepoId} from "../../../core/repoId";
import {Mirror} from "../../../graphql/mirror";
import fetchGithubRepo, {postQuery} from "../fetchGithubRepo";
import type {Repository} from "../graphqlTypes";
import {RelationalView, type RelationalViewJSON} from "../relationalView";
import githubSchema from "../schema";
async function test(options: {|
+token: string,
+owner: string,
+name: string,
+graphqlId: string,
+outputFilepaths: {|
+continuations: string,
+mirror: string,
|},
|}) {
async function fetchViaContinuations(): Promise<RelationalViewJSON> {
const raw = await fetchGithubRepo(
makeRepoId(options.owner, options.name),
options.token
);
const rv = new RelationalView();
rv.addData(raw);
return rv.toJSON();
}
async function fetchViaMirror(): Promise<RelationalViewJSON> {
const mirror = new Mirror(new Database(":memory:"), githubSchema());
mirror.registerObject({typename: "Repository", id: options.graphqlId});
await mirror.update((payload) => postQuery(payload, options.token), {
nodesLimit: 100,
nodesOfTypeLimit: 100,
connectionPageSize: 100,
connectionLimit: 100,
since: new Date(0),
now: () => new Date(),
});
const repository = ((mirror.extract(options.graphqlId): any): Repository);
const rv = new RelationalView();
rv.addRepository(repository);
return rv.toJSON();
}
function saveTo(filename: string, repo: RelationalViewJSON): Promise<void> {
return fs.writeFile(filename, stringify(repo));
}
const [viaContinuations, viaMirror] = await Promise.all([
fetchViaContinuations(),
fetchViaMirror(),
]);
if (deepEqual(viaContinuations, viaMirror)) {
console.log("Identical. Saving to disk...");
} else {
console.log("Different. Saving to disk...");
}
await Promise.all([
saveTo(options.outputFilepaths.continuations, viaContinuations),
saveTo(options.outputFilepaths.mirror, viaMirror),
]);
}
async function main() {
const args = process.argv.slice(2);
const token = process.env.SOURCECRED_GITHUB_TOKEN;
if (args.length !== 5 || token == null) {
const invocation = [
"SOURCECRED_GITHUB_TOKEN=<token>",
"node",
"test.js",
"REPO_OWNER",
"REPO_NAME",
"GRAPHQL_ID",
"CONTINUATIONS_OUTPUT_FILENAME",
"MIRROR_OUTPUT_FILENAME",
];
console.error("usage: " + invocation.join(" "));
process.exitCode = 1;
return;
}
const [owner, name, graphqlId, continuations, mirror] = args;
const options = {
token,
owner,
name,
graphqlId,
outputFilepaths: {
continuations,
mirror,
},
};
await test(options);
}
main();

View File

@ -15,23 +15,15 @@ import type {
CommentAddress, CommentAddress,
UserlikeAddress, UserlikeAddress,
} from "./nodes"; } from "./nodes";
import type { import * as T from "./graphqlTypes";
GithubResponseJSON, import type {GithubResponseJSON} from "./graphql";
RepositoryJSON,
IssueJSON,
PullJSON,
ReviewJSON,
CommentJSON,
CommitJSON,
NullableAuthorJSON,
ReviewState,
ReactionJSON,
ReactionContent,
} from "./graphql";
import * as GitNode from "../git/nodes"; import * as GitNode from "../git/nodes";
import * as MapUtil from "../../util/map"; import * as MapUtil from "../../util/map";
import * as NullUtil from "../../util/null";
import {botSet} from "./bots"; import {botSet} from "./bots";
import translateContinuations from "./translateContinuations";
import { import {
reviewUrlToId, reviewUrlToId,
issueCommentUrlToId, issueCommentUrlToId,
@ -72,7 +64,15 @@ export class RelationalView {
// state. for example, if called with {repo: {issues: [1,2,3]}} and then with // state. for example, if called with {repo: {issues: [1,2,3]}} and then with
// {repo: {issues: [4, 5]}}, then calls to repo.issues() will only give back // {repo: {issues: [4, 5]}}, then calls to repo.issues() will only give back
// issues 4 and 5 (although issues 1, 2, and 3 will still be in the view) // issues 4 and 5 (although issues 1, 2, and 3 will still be in the view)
this._addRepo(data.repository); const {result: repository, warnings} = translateContinuations(data);
for (const warning of warnings) {
console.warn(stringify(warning));
}
this.addRepository(repository);
}
addRepository(repository: T.Repository): void {
this._addRepo(repository);
this._addReferences(); this._addReferences();
} }
@ -293,29 +293,33 @@ export class RelationalView {
return rv; return rv;
} }
_addRepo(json: RepositoryJSON) { _addRepo(json: T.Repository) {
const address: RepoAddress = { const address: RepoAddress = {
type: N.REPO_TYPE, type: N.REPO_TYPE,
owner: json.owner.login, owner: NullUtil.get(json.owner).login,
name: json.name, name: json.name,
}; };
const entry: RepoEntry = { const entry: RepoEntry = {
address, address,
url: json.url, url: json.url,
issues: json.issues.nodes.map((x) => this._addIssue(address, x)), issues: expectAllNonNull(json, "issues", json.issues).map((x) =>
pulls: json.pulls.nodes.map((x) => this._addPull(address, x)), this._addIssue(address, x)
),
pulls: expectAllNonNull(json, "pullRequests", json.pullRequests).map(
(x) => this._addPull(address, x)
),
}; };
const raw = N.toRaw(address); const raw = N.toRaw(address);
this._repos.set(raw, entry); this._repos.set(raw, entry);
this._addCommitHistory(json); this._addCommitHistory(json);
} }
_addCommitHistory(json: RepositoryJSON) { _addCommitHistory(json: T.Repository) {
if (json.defaultBranchRef != null) { if (json.defaultBranchRef != null) {
const target = json.defaultBranchRef.target; const target = json.defaultBranchRef.target;
if (target.__typename === "Commit") { if (target != null && target.__typename === "Commit") {
target.history.nodes.forEach((commit) => this._addCommit(commit)); this._addCommit(target);
} else { } else if (target != null) {
throw new Error( throw new Error(
dedent`\ dedent`\
Your repo doesn't have a commit as its defaultBranchRef's target. \ Your repo doesn't have a commit as its defaultBranchRef's target. \
@ -327,7 +331,7 @@ export class RelationalView {
} }
} }
_addIssue(repo: RepoAddress, json: IssueJSON): IssueAddress { _addIssue(repo: RepoAddress, json: T.Issue): IssueAddress {
const address: IssueAddress = { const address: IssueAddress = {
type: N.ISSUE_TYPE, type: N.ISSUE_TYPE,
number: String(json.number), number: String(json.number),
@ -336,18 +340,53 @@ export class RelationalView {
const entry: IssueEntry = { const entry: IssueEntry = {
address, address,
url: json.url, url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
this._addComment(address, x)
),
authors: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
title: json.title, title: json.title,
reactions: json.reactions.nodes.map((x) => this._addReaction(x)), reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
this._addReaction(x)
),
}; };
this._issues.set(N.toRaw(address), entry); this._issues.set(N.toRaw(address), entry);
return address; return address;
} }
_addCommit(json: CommitJSON): GitNode.CommitAddress { _addCommit(json: T.Commit): GitNode.CommitAddress {
const address = {type: GitNode.COMMIT_TYPE, hash: json.oid}; const address = {type: GitNode.COMMIT_TYPE, hash: json.oid};
const rawAddress = N.toRaw(address);
// This fast-return is critical, because a single commit may appear
// many times in the repository.
//
// For example, a repository with 1000 commits, each of which was
// created by merging a pull request, will include 1001 structural
// references to the root commit (one via the HEAD ref, and one
// along a parent-chain from each pull request), so the number of
// occurrences of commits can easily be quadratic in the number of
// actual commits.
//
// Furthermore, it can be exponential: consider merge commits like
//
// HEAD
// / \
// 1A 1B
// | X |
// 2A 2B
// | X |
// 3A 3B
// \ /
// root
//
// where each merge commit has two parents. The root commit is
// reachable along about 2^(n/2) paths.
//
// This check ensures that we process each commit edge only once.
if (this._commits.has(rawAddress)) {
return address;
}
const authors = const authors =
json.author == null ? [] : this._addNullableAuthor(json.author.user); json.author == null ? [] : this._addNullableAuthor(json.author.user);
const entry: CommitEntry = { const entry: CommitEntry = {
@ -357,10 +396,15 @@ export class RelationalView {
message: json.message, message: json.message,
}; };
this._commits.set(N.toRaw(address), entry); this._commits.set(N.toRaw(address), entry);
for (const parent of json.parents) {
if (parent != null) {
this._addCommit(parent);
}
}
return address; return address;
} }
_addPull(repo: RepoAddress, json: PullJSON): PullAddress { _addPull(repo: RepoAddress, json: T.PullRequest): PullAddress {
const address: PullAddress = { const address: PullAddress = {
type: N.PULL_TYPE, type: N.PULL_TYPE,
number: String(json.number), number: String(json.number),
@ -372,21 +416,27 @@ export class RelationalView {
const entry: PullEntry = { const entry: PullEntry = {
address, address,
url: json.url, url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
reviews: json.reviews.nodes.map((x) => this._addReview(address, x)), this._addComment(address, x)
),
reviews: expectAllNonNull(json, "reviews", json.reviews).map((x) =>
this._addReview(address, x)
),
authors: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
title: json.title, title: json.title,
mergedAs, mergedAs,
additions: json.additions, additions: json.additions,
deletions: json.deletions, deletions: json.deletions,
reactions: json.reactions.nodes.map((x) => this._addReaction(x)), reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
this._addReaction(x)
),
}; };
this._pulls.set(N.toRaw(address), entry); this._pulls.set(N.toRaw(address), entry);
return address; return address;
} }
_addReview(pull: PullAddress, json: ReviewJSON): ReviewAddress { _addReview(pull: PullAddress, json: T.PullRequestReview): ReviewAddress {
const address: ReviewAddress = { const address: ReviewAddress = {
type: N.REVIEW_TYPE, type: N.REVIEW_TYPE,
id: reviewUrlToId(json.url), id: reviewUrlToId(json.url),
@ -396,7 +446,9 @@ export class RelationalView {
address, address,
url: json.url, url: json.url,
state: json.state, state: json.state,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
this._addComment(address, x)
),
body: json.body, body: json.body,
authors: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
}; };
@ -406,7 +458,7 @@ export class RelationalView {
_addComment( _addComment(
parent: IssueAddress | PullAddress | ReviewAddress, parent: IssueAddress | PullAddress | ReviewAddress,
json: CommentJSON json: T.IssueComment | T.PullRequestReviewComment
): CommentAddress { ): CommentAddress {
const id = (function() { const id = (function() {
switch (parent.type) { switch (parent.type) {
@ -428,13 +480,15 @@ export class RelationalView {
url: json.url, url: json.url,
authors: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
reactions: json.reactions.nodes.map((x) => this._addReaction(x)), reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
this._addReaction(x)
),
}; };
this._comments.set(N.toRaw(address), entry); this._comments.set(N.toRaw(address), entry);
return address; return address;
} }
_addReaction(json: ReactionJSON): ReactionRecord { _addReaction(json: T.Reaction): ReactionRecord {
const authorAddresses = this._addNullableAuthor(json.user); const authorAddresses = this._addNullableAuthor(json.user);
if (authorAddresses.length !== 1) { if (authorAddresses.length !== 1) {
throw new Error( throw new Error(
@ -444,7 +498,7 @@ export class RelationalView {
return {content: json.content, user: authorAddresses[0]}; return {content: json.content, user: authorAddresses[0]};
} }
_addNullableAuthor(json: NullableAuthorJSON): UserlikeAddress[] { _addNullableAuthor(json: null | T.Actor): UserlikeAddress[] {
if (json == null) { if (json == null) {
return []; return [];
} else { } else {
@ -639,7 +693,7 @@ export class RelationalView {
} }
type ReactionRecord = {| type ReactionRecord = {|
+content: ReactionContent, +content: T.ReactionContent,
+user: UserlikeAddress, +user: UserlikeAddress,
|}; |};
@ -824,7 +878,7 @@ type ReviewEntry = {|
+body: string, +body: string,
+url: string, +url: string,
+comments: CommentAddress[], +comments: CommentAddress[],
+state: ReviewState, +state: T.PullRequestReviewState,
+authors: UserlikeAddress[], +authors: UserlikeAddress[],
|}; |};
@ -1002,6 +1056,33 @@ export function match<T>(handlers: MatchHandlers<T>, x: Entity): T {
throw new Error(`Unexpected entity ${x}`); throw new Error(`Unexpected entity ${x}`);
} }
/**
* Invoke this function when the GitHub GraphQL schema docs indicate
* that a connection provides a list of _nullable_ nodes, but we expect
* them all to always be non-null.
*
* This will drop any `null` elements from the provided list, issuing a
* warning to stderr if `null`s are found.
*/
export function expectAllNonNull<T>(
context: {+__typename: string, +id: string},
fieldname: string,
xs: $ReadOnlyArray<null | T>
): Array<T> {
const result = [];
for (const x of xs) {
if (x !== null) {
result.push(x);
} else {
console.warn(
`${context.__typename}[${context.id}].${fieldname}: ` +
"unexpected null value"
);
}
}
return result;
}
export type Entity = Repo | Issue | Pull | Review | Comment | Commit | Userlike; export type Entity = Repo | Issue | Pull | Review | Comment | Commit | Userlike;
export type AuthoredEntity = Issue | Pull | Review | Comment | Commit; export type AuthoredEntity = Issue | Pull | Review | Comment | Commit;
export type TextContentEntity = Issue | Pull | Review | Comment | Commit; export type TextContentEntity = Issue | Pull | Review | Comment | Commit;

View File

@ -355,4 +355,29 @@ describe("plugins/github/relationalView", () => {
expect(json1).toEqual(json2); expect(json1).toEqual(json2);
}); });
}); });
describe("expectAllNonNull", () => {
it("returns non-null elements unscathed, with no warnings", () => {
jest.spyOn(console, "warn").mockImplementation(() => {});
jest.spyOn(console, "error").mockImplementation(() => {});
const o = {__typename: "X", id: "x", xs: [1, 2]};
expect(R.expectAllNonNull(o, "xs", o.xs)).toEqual([1, 2]);
expect(console.warn).not.toHaveBeenCalled();
expect(console.error).not.toHaveBeenCalled();
});
it("filters out nulls, issuing a warning", () => {
jest.spyOn(console, "warn").mockImplementation(() => {});
jest.spyOn(console, "error").mockImplementation(() => {});
const o = {__typename: "X", id: "x", xs: [1, null, 3, null]};
expect(R.expectAllNonNull(o, "xs", o.xs)).toEqual([1, 3]);
expect(console.warn).toHaveBeenCalledTimes(2);
expect(console.warn).toHaveBeenCalledWith(
"X[x].xs: unexpected null value"
);
expect(console.error).not.toHaveBeenCalled();
});
});
}); });