relationalView: use new data format (#934)
Summary: This makes significant progress toward #923. As of this commit, it is possible to use the Mirror module for the whole loading pipeline. This process may be slow for repositories that do not use pull requests at all (more precisely, that have large connected commit subgraphs none of whose nodes is the merge commit of a pull request; see #920 for details) so it is not yet the default codepath. Test Plan: Existing unit tests should suffice. For extra testing, I’ve added a script that fetches a repository both via the old continuations logic and the new Mirror logic, then constructs relational views and checks whether the data is the same. For `example-github`, the views are identical. For `sourcecred`, they are not: the old continuations logic erroneously omits two commits, which the Mirror logic includes. You can run the test like this: ``` $ node ./bin/testContinuations.js \ > sourcecred sourcecred MDEwOlJlcG9zaXRvcnkxMjAxNDU1NzA= \ > /tmp/continuations.json /tmp/mirror.json \ > 2> >(jq . >&2) { "child": "0d38dde23a6de831315f3643a7d2bc15e8df7678", "parent": "cb8ba0eaa1abc1f921e7165bb19e29b40723ce65", "type": "UNKNOWN_PARENT_OID" } { "child": "d152f48ce4c2ed1d046bf6ed4f139e7e393ea660", "parent": "de7a8723963d9cd0437ef34f5942a071b850c0e7", "type": "UNKNOWN_PARENT_OID" } Different. Saving to disk... ``` Use `diff -u <(jq . /tmp/continuations.json) <(jq . /tmp/mirror.json)` to inspect the differences, and note that exactly the two missing commits have been added and that there are no other changes. (The diff is small: just 51 lines of nicely formatted JSON.) The full log is here: <https://gist.github.com/wchargin/e159cac9dcf3cc3b1efbd54f59e24e0b> I also generated the `sourcecred/sourcecred` cred attribution and viewed it with `yarn start`, which seems to work fine. wchargin-branch: relationalview-new-data-format
This commit is contained in:
parent
58e98124ac
commit
e2c99c418b
|
@ -29,6 +29,9 @@ module.exports = {
|
|||
backendEntryPoints: {
|
||||
sourcecred: resolveApp("src/cli/main.js"),
|
||||
//
|
||||
testContinuations: resolveApp(
|
||||
"src/plugins/github/bin/testContinuations.js"
|
||||
),
|
||||
generateGithubGraphqlFlowTypes: resolveApp(
|
||||
"src/plugins/github/bin/generateGraphqlFlowTypes.js"
|
||||
),
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
// @flow
|
||||
// Ad hoc testing script for RelationalView input format consistency.
|
||||
|
||||
import Database from "better-sqlite3";
|
||||
import fs from "fs-extra";
|
||||
import stringify from "json-stable-stringify";
|
||||
import deepEqual from "lodash.isequal";
|
||||
|
||||
import {makeRepoId} from "../../../core/repoId";
|
||||
import {Mirror} from "../../../graphql/mirror";
|
||||
import fetchGithubRepo, {postQuery} from "../fetchGithubRepo";
|
||||
import type {Repository} from "../graphqlTypes";
|
||||
import {RelationalView, type RelationalViewJSON} from "../relationalView";
|
||||
import githubSchema from "../schema";
|
||||
|
||||
async function test(options: {|
|
||||
+token: string,
|
||||
+owner: string,
|
||||
+name: string,
|
||||
+graphqlId: string,
|
||||
+outputFilepaths: {|
|
||||
+continuations: string,
|
||||
+mirror: string,
|
||||
|},
|
||||
|}) {
|
||||
async function fetchViaContinuations(): Promise<RelationalViewJSON> {
|
||||
const raw = await fetchGithubRepo(
|
||||
makeRepoId(options.owner, options.name),
|
||||
options.token
|
||||
);
|
||||
const rv = new RelationalView();
|
||||
rv.addData(raw);
|
||||
return rv.toJSON();
|
||||
}
|
||||
|
||||
async function fetchViaMirror(): Promise<RelationalViewJSON> {
|
||||
const mirror = new Mirror(new Database(":memory:"), githubSchema());
|
||||
mirror.registerObject({typename: "Repository", id: options.graphqlId});
|
||||
await mirror.update((payload) => postQuery(payload, options.token), {
|
||||
nodesLimit: 100,
|
||||
nodesOfTypeLimit: 100,
|
||||
connectionPageSize: 100,
|
||||
connectionLimit: 100,
|
||||
since: new Date(0),
|
||||
now: () => new Date(),
|
||||
});
|
||||
const repository = ((mirror.extract(options.graphqlId): any): Repository);
|
||||
const rv = new RelationalView();
|
||||
rv.addRepository(repository);
|
||||
return rv.toJSON();
|
||||
}
|
||||
|
||||
function saveTo(filename: string, repo: RelationalViewJSON): Promise<void> {
|
||||
return fs.writeFile(filename, stringify(repo));
|
||||
}
|
||||
|
||||
const [viaContinuations, viaMirror] = await Promise.all([
|
||||
fetchViaContinuations(),
|
||||
fetchViaMirror(),
|
||||
]);
|
||||
|
||||
if (deepEqual(viaContinuations, viaMirror)) {
|
||||
console.log("Identical. Saving to disk...");
|
||||
} else {
|
||||
console.log("Different. Saving to disk...");
|
||||
}
|
||||
|
||||
await Promise.all([
|
||||
saveTo(options.outputFilepaths.continuations, viaContinuations),
|
||||
saveTo(options.outputFilepaths.mirror, viaMirror),
|
||||
]);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const token = process.env.SOURCECRED_GITHUB_TOKEN;
|
||||
if (args.length !== 5 || token == null) {
|
||||
const invocation = [
|
||||
"SOURCECRED_GITHUB_TOKEN=<token>",
|
||||
"node",
|
||||
"test.js",
|
||||
"REPO_OWNER",
|
||||
"REPO_NAME",
|
||||
"GRAPHQL_ID",
|
||||
"CONTINUATIONS_OUTPUT_FILENAME",
|
||||
"MIRROR_OUTPUT_FILENAME",
|
||||
];
|
||||
console.error("usage: " + invocation.join(" "));
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
const [owner, name, graphqlId, continuations, mirror] = args;
|
||||
const options = {
|
||||
token,
|
||||
owner,
|
||||
name,
|
||||
graphqlId,
|
||||
outputFilepaths: {
|
||||
continuations,
|
||||
mirror,
|
||||
},
|
||||
};
|
||||
await test(options);
|
||||
}
|
||||
|
||||
main();
|
|
@ -15,23 +15,15 @@ import type {
|
|||
CommentAddress,
|
||||
UserlikeAddress,
|
||||
} from "./nodes";
|
||||
import type {
|
||||
GithubResponseJSON,
|
||||
RepositoryJSON,
|
||||
IssueJSON,
|
||||
PullJSON,
|
||||
ReviewJSON,
|
||||
CommentJSON,
|
||||
CommitJSON,
|
||||
NullableAuthorJSON,
|
||||
ReviewState,
|
||||
ReactionJSON,
|
||||
ReactionContent,
|
||||
} from "./graphql";
|
||||
import * as T from "./graphqlTypes";
|
||||
import type {GithubResponseJSON} from "./graphql";
|
||||
import * as GitNode from "../git/nodes";
|
||||
import * as MapUtil from "../../util/map";
|
||||
import * as NullUtil from "../../util/null";
|
||||
import {botSet} from "./bots";
|
||||
|
||||
import translateContinuations from "./translateContinuations";
|
||||
|
||||
import {
|
||||
reviewUrlToId,
|
||||
issueCommentUrlToId,
|
||||
|
@ -72,7 +64,15 @@ export class RelationalView {
|
|||
// state. for example, if called with {repo: {issues: [1,2,3]}} and then with
|
||||
// {repo: {issues: [4, 5]}}, then calls to repo.issues() will only give back
|
||||
// issues 4 and 5 (although issues 1, 2, and 3 will still be in the view)
|
||||
this._addRepo(data.repository);
|
||||
const {result: repository, warnings} = translateContinuations(data);
|
||||
for (const warning of warnings) {
|
||||
console.warn(stringify(warning));
|
||||
}
|
||||
this.addRepository(repository);
|
||||
}
|
||||
|
||||
addRepository(repository: T.Repository): void {
|
||||
this._addRepo(repository);
|
||||
this._addReferences();
|
||||
}
|
||||
|
||||
|
@ -293,29 +293,33 @@ export class RelationalView {
|
|||
return rv;
|
||||
}
|
||||
|
||||
_addRepo(json: RepositoryJSON) {
|
||||
_addRepo(json: T.Repository) {
|
||||
const address: RepoAddress = {
|
||||
type: N.REPO_TYPE,
|
||||
owner: json.owner.login,
|
||||
owner: NullUtil.get(json.owner).login,
|
||||
name: json.name,
|
||||
};
|
||||
const entry: RepoEntry = {
|
||||
address,
|
||||
url: json.url,
|
||||
issues: json.issues.nodes.map((x) => this._addIssue(address, x)),
|
||||
pulls: json.pulls.nodes.map((x) => this._addPull(address, x)),
|
||||
issues: expectAllNonNull(json, "issues", json.issues).map((x) =>
|
||||
this._addIssue(address, x)
|
||||
),
|
||||
pulls: expectAllNonNull(json, "pullRequests", json.pullRequests).map(
|
||||
(x) => this._addPull(address, x)
|
||||
),
|
||||
};
|
||||
const raw = N.toRaw(address);
|
||||
this._repos.set(raw, entry);
|
||||
this._addCommitHistory(json);
|
||||
}
|
||||
|
||||
_addCommitHistory(json: RepositoryJSON) {
|
||||
_addCommitHistory(json: T.Repository) {
|
||||
if (json.defaultBranchRef != null) {
|
||||
const target = json.defaultBranchRef.target;
|
||||
if (target.__typename === "Commit") {
|
||||
target.history.nodes.forEach((commit) => this._addCommit(commit));
|
||||
} else {
|
||||
if (target != null && target.__typename === "Commit") {
|
||||
this._addCommit(target);
|
||||
} else if (target != null) {
|
||||
throw new Error(
|
||||
dedent`\
|
||||
Your repo doesn't have a commit as its defaultBranchRef's target. \
|
||||
|
@ -327,7 +331,7 @@ export class RelationalView {
|
|||
}
|
||||
}
|
||||
|
||||
_addIssue(repo: RepoAddress, json: IssueJSON): IssueAddress {
|
||||
_addIssue(repo: RepoAddress, json: T.Issue): IssueAddress {
|
||||
const address: IssueAddress = {
|
||||
type: N.ISSUE_TYPE,
|
||||
number: String(json.number),
|
||||
|
@ -336,18 +340,53 @@ export class RelationalView {
|
|||
const entry: IssueEntry = {
|
||||
address,
|
||||
url: json.url,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
|
||||
this._addComment(address, x)
|
||||
),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
title: json.title,
|
||||
reactions: json.reactions.nodes.map((x) => this._addReaction(x)),
|
||||
reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
|
||||
this._addReaction(x)
|
||||
),
|
||||
};
|
||||
this._issues.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
}
|
||||
|
||||
_addCommit(json: CommitJSON): GitNode.CommitAddress {
|
||||
_addCommit(json: T.Commit): GitNode.CommitAddress {
|
||||
const address = {type: GitNode.COMMIT_TYPE, hash: json.oid};
|
||||
const rawAddress = N.toRaw(address);
|
||||
// This fast-return is critical, because a single commit may appear
|
||||
// many times in the repository.
|
||||
//
|
||||
// For example, a repository with 1000 commits, each of which was
|
||||
// created by merging a pull request, will include 1001 structural
|
||||
// references to the root commit (one via the HEAD ref, and one
|
||||
// along a parent-chain from each pull request), so the number of
|
||||
// occurrences of commits can easily be quadratic in the number of
|
||||
// actual commits.
|
||||
//
|
||||
// Furthermore, it can be exponential: consider merge commits like
|
||||
//
|
||||
// HEAD
|
||||
// / \
|
||||
// 1A 1B
|
||||
// | X |
|
||||
// 2A 2B
|
||||
// | X |
|
||||
// 3A 3B
|
||||
// \ /
|
||||
// root
|
||||
//
|
||||
// where each merge commit has two parents. The root commit is
|
||||
// reachable along about 2^(n/2) paths.
|
||||
//
|
||||
// This check ensures that we process each commit edge only once.
|
||||
if (this._commits.has(rawAddress)) {
|
||||
return address;
|
||||
}
|
||||
|
||||
const authors =
|
||||
json.author == null ? [] : this._addNullableAuthor(json.author.user);
|
||||
const entry: CommitEntry = {
|
||||
|
@ -357,10 +396,15 @@ export class RelationalView {
|
|||
message: json.message,
|
||||
};
|
||||
this._commits.set(N.toRaw(address), entry);
|
||||
for (const parent of json.parents) {
|
||||
if (parent != null) {
|
||||
this._addCommit(parent);
|
||||
}
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
_addPull(repo: RepoAddress, json: PullJSON): PullAddress {
|
||||
_addPull(repo: RepoAddress, json: T.PullRequest): PullAddress {
|
||||
const address: PullAddress = {
|
||||
type: N.PULL_TYPE,
|
||||
number: String(json.number),
|
||||
|
@ -372,21 +416,27 @@ export class RelationalView {
|
|||
const entry: PullEntry = {
|
||||
address,
|
||||
url: json.url,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
reviews: json.reviews.nodes.map((x) => this._addReview(address, x)),
|
||||
comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
|
||||
this._addComment(address, x)
|
||||
),
|
||||
reviews: expectAllNonNull(json, "reviews", json.reviews).map((x) =>
|
||||
this._addReview(address, x)
|
||||
),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
title: json.title,
|
||||
mergedAs,
|
||||
additions: json.additions,
|
||||
deletions: json.deletions,
|
||||
reactions: json.reactions.nodes.map((x) => this._addReaction(x)),
|
||||
reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
|
||||
this._addReaction(x)
|
||||
),
|
||||
};
|
||||
this._pulls.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
}
|
||||
|
||||
_addReview(pull: PullAddress, json: ReviewJSON): ReviewAddress {
|
||||
_addReview(pull: PullAddress, json: T.PullRequestReview): ReviewAddress {
|
||||
const address: ReviewAddress = {
|
||||
type: N.REVIEW_TYPE,
|
||||
id: reviewUrlToId(json.url),
|
||||
|
@ -396,7 +446,9 @@ export class RelationalView {
|
|||
address,
|
||||
url: json.url,
|
||||
state: json.state,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
comments: expectAllNonNull(json, "comments", json.comments).map((x) =>
|
||||
this._addComment(address, x)
|
||||
),
|
||||
body: json.body,
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
};
|
||||
|
@ -406,7 +458,7 @@ export class RelationalView {
|
|||
|
||||
_addComment(
|
||||
parent: IssueAddress | PullAddress | ReviewAddress,
|
||||
json: CommentJSON
|
||||
json: T.IssueComment | T.PullRequestReviewComment
|
||||
): CommentAddress {
|
||||
const id = (function() {
|
||||
switch (parent.type) {
|
||||
|
@ -428,13 +480,15 @@ export class RelationalView {
|
|||
url: json.url,
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
reactions: json.reactions.nodes.map((x) => this._addReaction(x)),
|
||||
reactions: expectAllNonNull(json, "reactions", json.reactions).map((x) =>
|
||||
this._addReaction(x)
|
||||
),
|
||||
};
|
||||
this._comments.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
}
|
||||
|
||||
_addReaction(json: ReactionJSON): ReactionRecord {
|
||||
_addReaction(json: T.Reaction): ReactionRecord {
|
||||
const authorAddresses = this._addNullableAuthor(json.user);
|
||||
if (authorAddresses.length !== 1) {
|
||||
throw new Error(
|
||||
|
@ -444,7 +498,7 @@ export class RelationalView {
|
|||
return {content: json.content, user: authorAddresses[0]};
|
||||
}
|
||||
|
||||
_addNullableAuthor(json: NullableAuthorJSON): UserlikeAddress[] {
|
||||
_addNullableAuthor(json: null | T.Actor): UserlikeAddress[] {
|
||||
if (json == null) {
|
||||
return [];
|
||||
} else {
|
||||
|
@ -639,7 +693,7 @@ export class RelationalView {
|
|||
}
|
||||
|
||||
type ReactionRecord = {|
|
||||
+content: ReactionContent,
|
||||
+content: T.ReactionContent,
|
||||
+user: UserlikeAddress,
|
||||
|};
|
||||
|
||||
|
@ -824,7 +878,7 @@ type ReviewEntry = {|
|
|||
+body: string,
|
||||
+url: string,
|
||||
+comments: CommentAddress[],
|
||||
+state: ReviewState,
|
||||
+state: T.PullRequestReviewState,
|
||||
+authors: UserlikeAddress[],
|
||||
|};
|
||||
|
||||
|
@ -1002,6 +1056,33 @@ export function match<T>(handlers: MatchHandlers<T>, x: Entity): T {
|
|||
throw new Error(`Unexpected entity ${x}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoke this function when the GitHub GraphQL schema docs indicate
|
||||
* that a connection provides a list of _nullable_ nodes, but we expect
|
||||
* them all to always be non-null.
|
||||
*
|
||||
* This will drop any `null` elements from the provided list, issuing a
|
||||
* warning to stderr if `null`s are found.
|
||||
*/
|
||||
export function expectAllNonNull<T>(
|
||||
context: {+__typename: string, +id: string},
|
||||
fieldname: string,
|
||||
xs: $ReadOnlyArray<null | T>
|
||||
): Array<T> {
|
||||
const result = [];
|
||||
for (const x of xs) {
|
||||
if (x !== null) {
|
||||
result.push(x);
|
||||
} else {
|
||||
console.warn(
|
||||
`${context.__typename}[${context.id}].${fieldname}: ` +
|
||||
"unexpected null value"
|
||||
);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export type Entity = Repo | Issue | Pull | Review | Comment | Commit | Userlike;
|
||||
export type AuthoredEntity = Issue | Pull | Review | Comment | Commit;
|
||||
export type TextContentEntity = Issue | Pull | Review | Comment | Commit;
|
||||
|
|
|
@ -355,4 +355,29 @@ describe("plugins/github/relationalView", () => {
|
|||
expect(json1).toEqual(json2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("expectAllNonNull", () => {
|
||||
it("returns non-null elements unscathed, with no warnings", () => {
|
||||
jest.spyOn(console, "warn").mockImplementation(() => {});
|
||||
jest.spyOn(console, "error").mockImplementation(() => {});
|
||||
|
||||
const o = {__typename: "X", id: "x", xs: [1, 2]};
|
||||
expect(R.expectAllNonNull(o, "xs", o.xs)).toEqual([1, 2]);
|
||||
expect(console.warn).not.toHaveBeenCalled();
|
||||
expect(console.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("filters out nulls, issuing a warning", () => {
|
||||
jest.spyOn(console, "warn").mockImplementation(() => {});
|
||||
jest.spyOn(console, "error").mockImplementation(() => {});
|
||||
|
||||
const o = {__typename: "X", id: "x", xs: [1, null, 3, null]};
|
||||
expect(R.expectAllNonNull(o, "xs", o.xs)).toEqual([1, 3]);
|
||||
expect(console.warn).toHaveBeenCalledTimes(2);
|
||||
expect(console.warn).toHaveBeenCalledWith(
|
||||
"X[x].xs: unexpected null value"
|
||||
);
|
||||
expect(console.error).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue