github: translate old format to structured format (#930)
Summary: This implements the translation module described in #923. See that issue for context. Test Plan: This is a mostly straightforward translation from one strongly typed data structure to another, so Flow handles most of it. As a check on the snapshot, run: ``` $ grep -e oid -e target -e mergeCommit \ > src/plugins/github/__snapshots__/translateContinuations.test.js.snap "target": Object { "oid": "6bd1b4c0b719c22c688a74863be07a699b7b9b34", "oid": "c430bd74455105f77215ece51945094ceeee6c86", "oid": "6d5b3aa31ebb68a06ceb46bbd6cf49b6ccd6f5e6", "oid": "0a223346b4e6dec0127b1e6aa892c4ee0424b66a", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "oid": "ecc889dc94cf6da17ae6eab5bb7b7155f577519d", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "mergeCommit": Object { "oid": "0a223346b4e6dec0127b1e6aa892c4ee0424b66a", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "oid": "ecc889dc94cf6da17ae6eab5bb7b7155f577519d", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "mergeCommit": Object { "oid": "6d5b3aa31ebb68a06ceb46bbd6cf49b6ccd6f5e6", "oid": "0a223346b4e6dec0127b1e6aa892c4ee0424b66a", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "oid": "ecc889dc94cf6da17ae6eab5bb7b7155f577519d", "oid": "ec91adb718a6045b492303f00d8e8beb957dc780", "mergeCommit": null, ``` Cross-check this against [the example-github commits][commits] thus: - Note that commit `6bd1b4c` is the head commit, and is thus the root commit of the `target` chain. - Note that commits `0a22334` and `6d5b3aa`, which were merged via pull request, appear twice each: once in the history from head, and once as the merge commit of a pull request. - Note that commit `0a22334` has two parents at each occurrence. - Note that the unmerged pull request’s merge commit is `null`. [commits]: https://github.com/sourcecred/example-github/commits/master To run this on real-world data, apply the following patch: ```diff diff --git a/src/plugins/github/fetchGithubRepo.js b/src/plugins/github/fetchGithubRepo.js index 6ac201af..b14ca760 100644 --- a/src/plugins/github/fetchGithubRepo.js +++ b/src/plugins/github/fetchGithubRepo.js @@ -11,6 +11,7 @@ import {stringify, inlineLayout, type Body} from "../../graphql/queries"; import {createQuery, createVariables, postQueryExhaustive} from "./graphql"; import type {GithubResponseJSON} from "./graphql"; import type {RepoId} from "../../core/repoId"; +import translateContinuations from "./translateContinuations"; /** * Scrape data from a GitHub repo using the GitHub API. @@ -44,6 +45,11 @@ export default function fetchGithubRepo( payload ).then((x: GithubResponseJSON) => { ensureNoMorePages(x); + console.warn("Translating continuations..."); + for (const w of translateContinuations(x).warnings) { + console.warn(w); + } + console.warn("Done."); return x; }); } ``` Then run: ``` $ yarn backend >/dev/null 2>/dev/null; echo $? 0 $ node ./bin/sourcecred.js load sourcecred/sourcecred --plugin github 2>&1 | > ts -s '%.s' 55.015740 Translating continuations... 55.037217 { type: 'UNKNOWN_PARENT_OID', 55.037273 child: '0d38dde23a6de831315f3643a7d2bc15e8df7678', 55.037290 parent: 'cb8ba0eaa1abc1f921e7165bb19e29b40723ce65' } 55.037309 { type: 'UNKNOWN_PARENT_OID', 55.037336 child: 'd152f48ce4c2ed1d046bf6ed4f139e7e393ea660', 55.037359 parent: 'de7a8723963d9cd0437ef34f5942a071b850c0e7' } 55.037383 Done. ``` Note that the two commits in question were each merged into a non-master branch, in #28 and #329 respectively. Note also that translating these continuations took just 22 milliseconds. wchargin-branch: github-translate-continuations
This commit is contained in:
parent
6499df6b6b
commit
993de9303a
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,331 @@
|
|||
// @flow
|
||||
// Temporary module to translate GraphQL results from the old format
|
||||
// with manually resolved continuations to the format emitted by the
|
||||
// Mirror module. See issue #923 for context.
|
||||
|
||||
import type {
|
||||
AuthorJSON,
|
||||
BotJSON,
|
||||
CommentJSON,
|
||||
CommitJSON,
|
||||
GitObjectJSON,
|
||||
GithubResponseJSON,
|
||||
IssueJSON,
|
||||
OrganizationJSON,
|
||||
PullJSON,
|
||||
ReactionJSON,
|
||||
RefJSON,
|
||||
RepositoryJSON,
|
||||
ReviewCommentJSON,
|
||||
ReviewJSON,
|
||||
UserJSON,
|
||||
} from "./graphql";
|
||||
import type {
|
||||
Actor,
|
||||
Blob,
|
||||
Bot,
|
||||
Commit,
|
||||
GitObject,
|
||||
GitObjectID,
|
||||
Issue,
|
||||
IssueComment,
|
||||
Organization,
|
||||
PullRequest,
|
||||
PullRequestReview,
|
||||
PullRequestReviewComment,
|
||||
Reaction,
|
||||
Ref,
|
||||
Repository,
|
||||
RepositoryOwner,
|
||||
Tag,
|
||||
Tree,
|
||||
User,
|
||||
} from "./graphqlTypes";
|
||||
|
||||
export type Warning =
|
||||
// We've never seen it happen, and don't know how it could. But the
|
||||
// GitHub schema says that it can. This warning is more of a
|
||||
// diagnostic to the SourceCred maintainers (if it comes up on a real
|
||||
// repository, we can learn something!) than an indication that
|
||||
// something has gone wrong.
|
||||
| {|+type: "NON_COMMIT_REF_TARGET", +target: GitObjectJSON|}
|
||||
// This can happen if a commit has a parent that we did not fetch. We
|
||||
// only fetch commits that are Git-reachable from HEAD or are the direct
|
||||
// merge commit of a pull request. We may therefore omit commits that
|
||||
// disappeared from master after a force-push, or were an ancestor of a
|
||||
// pull request that was merged into a branch other than master. See
|
||||
// issue #923 for more context. If this is omitted, we will simply
|
||||
// omit the offending parent commit.
|
||||
| {|+type: "UNKNOWN_PARENT_OID", +child: GitObjectID, +parent: GitObjectID|};
|
||||
|
||||
export default function translate(
|
||||
json: GithubResponseJSON
|
||||
): {|
|
||||
+result: Repository,
|
||||
+warnings: $ReadOnlyArray<Warning>,
|
||||
|} {
|
||||
const repositoryJson = json.repository;
|
||||
const warnings: Array<Warning> = [];
|
||||
|
||||
// Most of the work that this function does is exploding connections
|
||||
// into lists of nodes. But commits require some special attention,
|
||||
// because we have to resolve parent OIDs to actual parent commits.
|
||||
// This means that it is most convenient to start by discovering all
|
||||
// commits in the data.
|
||||
const commits: Map<
|
||||
GitObjectID,
|
||||
{|
|
||||
...Commit,
|
||||
parents: Array<null | Commit>, // mutable: we build this incrementally
|
||||
|}
|
||||
> = new Map();
|
||||
|
||||
// First, create all the commit objects, initializing them with empty
|
||||
// parent arrays. We put these temporarily into a map keyed by OID for
|
||||
// deduplication: a commit may appear both in the linearized history
|
||||
// from HEAD and also as the merge commit of a pull request, and we
|
||||
// want to process it just once.
|
||||
const commitJsons: $ReadOnlyArray<CommitJSON> = Array.from(
|
||||
new Map(
|
||||
Array.from(
|
||||
(function*() {
|
||||
if (repositoryJson.defaultBranchRef) {
|
||||
const target = repositoryJson.defaultBranchRef.target;
|
||||
switch (target.__typename) {
|
||||
case "Commit":
|
||||
yield* target.history.nodes;
|
||||
break;
|
||||
case "Tree":
|
||||
case "Blob":
|
||||
case "Tag":
|
||||
warnings.push({type: "NON_COMMIT_REF_TARGET", target});
|
||||
break;
|
||||
// istanbul ignore next: unreachable per Flow
|
||||
default:
|
||||
throw new Error((target.type: empty));
|
||||
}
|
||||
}
|
||||
for (const pull of repositoryJson.pulls.nodes) {
|
||||
if (pull.mergeCommit) {
|
||||
yield pull.mergeCommit;
|
||||
}
|
||||
}
|
||||
})()
|
||||
).map((json) => [json.oid, json])
|
||||
).values()
|
||||
);
|
||||
for (const commitJson of commitJsons) {
|
||||
const commit = {
|
||||
__typename: "Commit",
|
||||
author: {...commitJson.author},
|
||||
id: commitJson.id,
|
||||
message: commitJson.message,
|
||||
oid: commitJson.oid,
|
||||
parents: [],
|
||||
url: commitJson.url,
|
||||
};
|
||||
commits.set(commit.oid, commit);
|
||||
}
|
||||
|
||||
// Then, once all the objects have been created, we can set up the
|
||||
// parents.
|
||||
for (const commitJson of commitJsons) {
|
||||
const commit = commits.get(commitJson.oid);
|
||||
// istanbul ignore next: should not be possible
|
||||
if (commit == null) {
|
||||
throw new Error(
|
||||
"invariant violation: commit came out of nowhere: " + commitJson.oid
|
||||
);
|
||||
}
|
||||
for (const {oid: parentOid} of commitJson.parents.nodes) {
|
||||
const parentCommit = commits.get(parentOid);
|
||||
if (parentCommit == null) {
|
||||
warnings.push({
|
||||
type: "UNKNOWN_PARENT_OID",
|
||||
child: commitJson.oid,
|
||||
parent: parentOid,
|
||||
});
|
||||
} else {
|
||||
commit.parents.push(parentCommit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The rest is mostly mechanical. The pattern is: we pull off and
|
||||
// recursively translate the non-primitive fields of each object, and
|
||||
// then add a typename and put back the primitives. For union types,
|
||||
// we switch on the __typename and dispatch to the appropriate object
|
||||
// translators.
|
||||
|
||||
function translateRepository(json: RepositoryJSON): Repository {
|
||||
const {defaultBranchRef, issues, owner, pulls, ...rest} = json;
|
||||
return {
|
||||
__typename: "Repository",
|
||||
defaultBranchRef:
|
||||
defaultBranchRef == null
|
||||
? null
|
||||
: translateDefaultBranchRef(defaultBranchRef),
|
||||
issues: issues.nodes.map(translateIssue),
|
||||
owner: translateRepositoryOwner(owner),
|
||||
pullRequests: pulls.nodes.map(translatePullRequest),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translateDefaultBranchRef(json: RefJSON): Ref {
|
||||
const {target, ...rest} = json;
|
||||
return {
|
||||
__typename: "Ref",
|
||||
target: translateDefaultBranchRefTarget(target),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
// This one is a bit wonky, because our `GitObjectJSON` type is not a
|
||||
// good representation of the GitHub schema. In particular, a
|
||||
// `GitObjectJSON` can represent a commit, but in a different form
|
||||
// than our `CommitJSON`! This function _only_ applies to
|
||||
// `GitObjectJSON`s that we fetched as the `target` of the
|
||||
// `defaultBranchRef` of a repository. But these are the only
|
||||
// `GitObjectJSON`s that we fetch, so it's okay.
|
||||
function translateDefaultBranchRefTarget(json: GitObjectJSON): GitObject {
|
||||
switch (json.__typename) {
|
||||
case "Commit":
|
||||
// The default branch ref is `null` if there are no commits, so
|
||||
// the history must include at least one commit (the HEAD
|
||||
// commit).
|
||||
return lookUpCommit(json.history.nodes[0].oid);
|
||||
case "Blob":
|
||||
return ({...json}: Blob);
|
||||
case "Tag":
|
||||
return ({...json}: Tag);
|
||||
case "Tree":
|
||||
return ({...json}: Tree);
|
||||
// istanbul ignore next: unreachable per Flow
|
||||
default:
|
||||
throw new Error((json.__typename: empty));
|
||||
}
|
||||
}
|
||||
|
||||
function lookUpCommit(oid: GitObjectID): Commit {
|
||||
const commit = commits.get(oid);
|
||||
// istanbul ignore if: unreachable: we explored all commits in
|
||||
// the response, including this one.
|
||||
if (commit == null) {
|
||||
throw new Error("invariant violation: unknown commit: " + oid);
|
||||
}
|
||||
return commit;
|
||||
}
|
||||
|
||||
function translateCommit(json: CommitJSON): Commit {
|
||||
return lookUpCommit(json.oid);
|
||||
}
|
||||
|
||||
function translateIssue(json: IssueJSON): Issue {
|
||||
const {author, comments, reactions, ...rest} = json;
|
||||
return {
|
||||
__typename: "Issue",
|
||||
author: author == null ? null : translateActor(author),
|
||||
comments: comments.nodes.map(translateIssueComment),
|
||||
reactions: reactions.nodes.map(translateReaction),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translateIssueComment(json: CommentJSON): IssueComment {
|
||||
const {author, reactions, ...rest} = json;
|
||||
return {
|
||||
__typename: "IssueComment",
|
||||
author: author == null ? null : translateActor(author),
|
||||
reactions: reactions.nodes.map(translateReaction),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translateReaction(json: ReactionJSON): Reaction {
|
||||
const {user, ...rest} = json;
|
||||
return {
|
||||
__typename: "Reaction",
|
||||
user: user == null ? null : translateUser(user),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translateRepositoryOwner(
|
||||
json: UserJSON | OrganizationJSON
|
||||
): RepositoryOwner {
|
||||
switch (json.__typename) {
|
||||
case "User":
|
||||
return translateUser(json);
|
||||
case "Organization":
|
||||
return translateOrganization(json);
|
||||
// istanbul ignore next: unreachable per Flow
|
||||
default:
|
||||
throw new Error((json.__typename: empty));
|
||||
}
|
||||
}
|
||||
|
||||
function translateActor(json: AuthorJSON): Actor {
|
||||
switch (json.__typename) {
|
||||
case "User":
|
||||
return translateUser(json);
|
||||
case "Organization":
|
||||
return translateOrganization(json);
|
||||
case "Bot":
|
||||
return translateBot(json);
|
||||
// istanbul ignore next: unreachable per Flow
|
||||
default:
|
||||
throw new Error((json.__typename: empty));
|
||||
}
|
||||
}
|
||||
|
||||
function translateUser(json: UserJSON): User {
|
||||
return {...json};
|
||||
}
|
||||
|
||||
function translateOrganization(json: OrganizationJSON): Organization {
|
||||
return {...json};
|
||||
}
|
||||
|
||||
function translateBot(json: BotJSON): Bot {
|
||||
return {...json};
|
||||
}
|
||||
|
||||
function translatePullRequest(json: PullJSON): PullRequest {
|
||||
const {author, comments, mergeCommit, reactions, reviews, ...rest} = json;
|
||||
return {
|
||||
__typename: "PullRequest",
|
||||
author: author == null ? null : translateActor(author),
|
||||
comments: comments.nodes.map(translateIssueComment),
|
||||
mergeCommit: mergeCommit == null ? null : translateCommit(mergeCommit),
|
||||
reactions: reactions.nodes.map(translateReaction),
|
||||
reviews: reviews.nodes.map(translatePullRequestReview),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translatePullRequestReview(json: ReviewJSON): PullRequestReview {
|
||||
const {author, comments, ...rest} = json;
|
||||
return {
|
||||
__typename: "PullRequestReview",
|
||||
author: author == null ? null : translateActor(author),
|
||||
comments: comments.nodes.map(translatePullRequestReviewComment),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function translatePullRequestReviewComment(
|
||||
json: ReviewCommentJSON
|
||||
): PullRequestReviewComment {
|
||||
const {author, reactions, ...rest} = json;
|
||||
return {
|
||||
__typename: "PullRequestReviewComment",
|
||||
author: author == null ? null : translateActor(author),
|
||||
reactions: reactions.nodes.map(translateReaction),
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
const result = translateRepository(repositoryJson);
|
||||
return {result, warnings};
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// @flow
|
||||
|
||||
import {exampleData} from "./example/example";
|
||||
|
||||
import translateContinuations from "./translateContinuations";
|
||||
|
||||
describe("plugins/github/translateContinuations", () => {
|
||||
describe("translateContinuations", () => {
|
||||
it("works on the example data", () => {
|
||||
expect(translateContinuations(exampleData())).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it("raises a warning if the defaultBranchRef is not a commit", () => {
|
||||
const exampleData = {
|
||||
repository: {
|
||||
defaultBranchRef: {
|
||||
id: "ref-id",
|
||||
target: {
|
||||
__typename: "Tree",
|
||||
id: "tree-id",
|
||||
oid: "123",
|
||||
},
|
||||
},
|
||||
id: "repo-id",
|
||||
issues: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
name: "bar",
|
||||
owner: {
|
||||
__typename: "User",
|
||||
id: "user-id",
|
||||
login: "foo",
|
||||
url: "https://github.com/foo",
|
||||
},
|
||||
pulls: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
url: "https://github.com/foo/bar",
|
||||
},
|
||||
};
|
||||
const {result, warnings} = translateContinuations(exampleData);
|
||||
expect(result.defaultBranchRef).toEqual({
|
||||
__typename: "Ref",
|
||||
id: "ref-id",
|
||||
target: {__typename: "Tree", id: "tree-id", oid: "123"},
|
||||
});
|
||||
expect(warnings).toEqual([
|
||||
{
|
||||
type: "NON_COMMIT_REF_TARGET",
|
||||
target: {__typename: "Tree", id: "tree-id", oid: "123"},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("raises a warning if there is an unknown commit", () => {
|
||||
const exampleData = {
|
||||
repository: {
|
||||
defaultBranchRef: null,
|
||||
id: "repo-id",
|
||||
issues: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
name: "bar",
|
||||
owner: {
|
||||
__typename: "User",
|
||||
id: "user-id",
|
||||
login: "foo",
|
||||
url: "https://github.com/foo",
|
||||
},
|
||||
pulls: {
|
||||
nodes: [
|
||||
{
|
||||
id: "pr-id",
|
||||
number: 1,
|
||||
author: {
|
||||
__typename: "Bot",
|
||||
id: "bot-id",
|
||||
login: "baz",
|
||||
url: "https://github.com/baz",
|
||||
},
|
||||
additions: 7,
|
||||
deletions: 9,
|
||||
comments: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
reviews: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
reactions: {
|
||||
nodes: [],
|
||||
pageInfo: {hasNextPage: false, endCursor: null},
|
||||
},
|
||||
mergeCommit: {
|
||||
id: "commit-id",
|
||||
author: {
|
||||
date: "2001-02-03T04:05:06",
|
||||
user: null,
|
||||
},
|
||||
message: "where are my parents?",
|
||||
oid: "456",
|
||||
parents: {
|
||||
nodes: [{oid: "789"}],
|
||||
pageInfo: {hasNextPage: false, endCursor: "cursor-parents"},
|
||||
},
|
||||
url: "https://github.com/foo/bar/commit/456",
|
||||
},
|
||||
title: "something",
|
||||
body: "whatever",
|
||||
url: "https://github.com/foo/bar/pull/1",
|
||||
},
|
||||
],
|
||||
pageInfo: {hasNextPage: false, endCursor: "cursor-pulls"},
|
||||
},
|
||||
url: "https://github.com/foo/bar",
|
||||
},
|
||||
};
|
||||
const {result, warnings} = translateContinuations(exampleData);
|
||||
const pr = result.pullRequests[0];
|
||||
if (pr == null) {
|
||||
throw new Error(String(pr));
|
||||
}
|
||||
expect(pr.mergeCommit).toEqual({
|
||||
__typename: "Commit",
|
||||
id: "commit-id",
|
||||
author: {
|
||||
date: "2001-02-03T04:05:06",
|
||||
user: null,
|
||||
},
|
||||
message: "where are my parents?",
|
||||
oid: "456",
|
||||
parents: [
|
||||
/* empty! */
|
||||
],
|
||||
url: "https://github.com/foo/bar/commit/456",
|
||||
});
|
||||
expect(warnings).toEqual([
|
||||
{
|
||||
type: "UNKNOWN_PARENT_OID",
|
||||
child: "456",
|
||||
parent: "789",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue