Add logic for finding MentionsAuthorReferences (#806)

A `MentionsAuthorReference` is created when a post mentions a user, and
that user has authored at least one post in the same thread. Then there
is a `MentionsAuthorReference` from the post to the other posts by that
author.

For context, see the docstrings in `mentionsAuthorReference.js`, and
see #804.

Test plan:
Thorough unit tests have been added, which test the entire pipeline,
from ingesting the data via GitHub's graphql responses, through to
detecting the references. Edge cases such as self-reference and
multi-reference are tested.

Thanks to @wchargin for help writing this commit.
This commit is contained in:
Dandelion Mané 2018-09-11 20:27:31 -07:00 committed by GitHub
parent bf1e85d6f4
commit 91f76393e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 434 additions and 0 deletions

View File

@ -0,0 +1,110 @@
// @flow
import * as MapUtil from "../../../util/map";
import * as RV from "../relationalView";
import * as N from "../nodes";
// MentionsAuthorReferences are inferred edges between posts. If a post
// contains an @-mention of a user, and that user has authored any posts
// in the same thread, then we create MentionsAuthorReferences from the
// mentioning post to any posts in the same thread that were authored by
// the mentioned author.
//
// A concrete example: Suppose I write a post, and I @-mention you. Then
// my post will have MentionsAuthorReferences pointing to any posts that
// you write. The inference is that because I mentioned you by name, my
// post probably values your posts (either because I am referring to
// content you wrote in a previous post, or because I appreciate that
// you showed up to engage with me after being mentioned)
//
// See #804 for context:
// https://github.com/sourcecred/sourcecred/issues/804
export type MentionsAuthorReference = {|
// The post that generated an mentions author reference (via @-mention)
+src: N.TextContentAddress,
// The post authored by the person mentioned in the src
+dst: N.TextContentAddress,
// The person that was mentioned in src. Storing `who` is necessary
// because in the case of paired authorship, there might be multiple
// pointers from the same src to the same dst
+who: N.UserlikeAddress,
|};
export function* findMentionsAuthorReferences(
view: RV.RelationalView
): Iterator<MentionsAuthorReference> {
function* issuesAndPulls() {
yield* view.issues();
yield* view.pulls();
}
for (const post of issuesAndPulls()) {
const thread = createThread(post);
yield* referencesFromThread(thread);
}
}
// A post in a thread. This is either a top-level post (like an issue or
// pull request) or a child (like a comment).
type ThreadPost = {|
// Address of the post.
+address: N.TextContentAddress,
// List of all authors of the post. Order is undefined (this is a set
// encoded as a list).
+authors: $ReadOnlyArray<N.UserlikeAddress>,
|};
type Thread = {|
// Includes the root post.
// Ordered chronologically.
+posts: $ReadOnlyArray<ThreadPost>,
// Map from raw addresses of userlike kind to an array of all indices
// `i` such that `posts[i]` references the user (in arbitrary order).
// The domain is exactly the set of all users who are referenced in
// the thread at least once.
+userlikeToPostsReferencingThem: Map<N.RawAddress, $ReadOnlyArray<number>>,
|};
function createThread(root: RV.Issue | RV.Pull): Thread {
const posts = [];
const userlikeToPostsReferencingThem = new Map();
function addPost(x: RV.Issue | RV.Pull | RV.Comment) {
const address = x.address();
const authors = Array.from(x.authors()).map((x) => x.address());
const post = {address, authors};
posts.push(post);
for (const referenced of x.references()) {
if (referenced instanceof RV.Userlike) {
const referencedAddress = N.toRaw(referenced.address());
MapUtil.pushValue(
(((userlikeToPostsReferencingThem: Map<
N.RawAddress,
$ReadOnlyArray<number>
>): any): Map<N.RawAddress, number[]>),
referencedAddress,
posts.length - 1
);
}
}
}
addPost(root);
for (const comment of root.comments()) {
addPost(comment);
}
return {posts, userlikeToPostsReferencingThem};
}
function* referencesFromThread(
thread: Thread
): Iterator<MentionsAuthorReference> {
for (const post of thread.posts) {
for (const author of post.authors) {
for (const postIndex of thread.userlikeToPostsReferencingThem.get(
N.toRaw(author)
) || []) {
const thankingAddress = thread.posts[postIndex].address;
yield {src: thankingAddress, dst: post.address, who: author};
}
}
}
}

View File

@ -0,0 +1,324 @@
// @flow
import * as RV from "../relationalView";
import * as N from "../nodes";
import type {ConnectionJSON} from "../graphql";
import deepEqual from "lodash.isequal";
import {findMentionsAuthorReferences} from "./mentionsAuthorReference";
describe("plugins/github/heuristics/mentionsAuthorReference", () => {
function exampleRelationalView(): RV.RelationalView {
const view = new RV.RelationalView();
function connection<T>(nodes: $ReadOnlyArray<T>): ConnectionJSON<T> {
return {
nodes,
pageInfo: {
endCursor: "cursor:ignored",
hasNextPage: false,
},
};
}
const authors = {
steven: () => ({
__typename: "User",
id: "user:steven",
login: "steven",
url: "https://github.com/steven",
}),
garnet: () => ({
__typename: "User",
id: "user:garnet",
login: "garnet",
url: "https://github.com/garnet",
}),
amethyst: () => ({
__typename: "User",
id: "user:amethyst",
login: "amethyst",
url: "https://github.com/amethyst",
}),
pearl: () => ({
__typename: "User",
id: "user:pearl",
login: "pearl",
url: "https://github.com/pearl",
}),
holo: () => ({
__typename: "User",
id: "user:holo-pearl",
login: "holo-pearl",
url: "https://github.com/holo-pearl",
}),
};
const repoUrl = "https://github.com/my-owner/my-repo";
function issueUrl(number: number): string {
return `${repoUrl}/issues/${String(number)}`;
}
function issueCommentUrl(issueNumber: number, commentNumber: number) {
return `${issueUrl(issueNumber)}#issuecomment-${commentNumber}`;
}
const data = {
repository: {
id: "repo:my-repo",
issues: connection([
{
id: "issue:1",
url: issueUrl(1),
number: 1,
title: "calling into the void",
body: "hi @amethyst",
author: authors.steven(),
comments: connection([]),
},
{
id: "issue:2",
number: 2,
url: issueUrl(2),
title: "an issue with many types of references",
body: "it is me, @steven\n\nPaired with: @pearl",
author: authors.steven(),
comments: connection([
{
id: "comment:2_1",
url: issueCommentUrl(2, 1),
body: "parry parry thrust @pearl\nparry parry thrust @steven",
author: authors.holo(),
},
{
id: "comment:2_2",
url: issueCommentUrl(2, 2),
body: "@holo-pearl: stop!",
author: authors.steven(),
},
{
id: "comment:2_3",
url: issueCommentUrl(2, 3),
body: "@amethyst @garnet why aren't you helping",
author: authors.pearl(),
},
{
id: "comment:2_4",
url: issueCommentUrl(2, 4),
body: "@amethyst! come quickly, @amethyst!",
author: authors.garnet(),
},
{
id: "comment:2_5",
url: issueCommentUrl(2, 5),
body: "i am busy fighting @boomerang-blade guy",
author: authors.amethyst(),
},
]),
},
]),
pulls: connection([
{
id: "pull:3",
url: "https://github.com/my-owner/my-repo/pulls/3",
number: 3,
body: "Self referentially yours: @steven",
additions: 0,
deletions: 0,
comments: connection([]),
author: authors.steven(),
reviews: connection([]),
},
]),
url: "https://github.com/my-owner/my-repo",
name: "my-repo",
owner: {
__typename: "Organization",
id: "org:my-owner",
login: "my-owner",
url: "https://github.com/my-owner",
},
},
};
view.addData(data);
return view;
}
describe("findMentionsAuthorReferences", () => {
// Tracks all expected references, across test cases, so that we can verify
// at the end that no unexpected references are present.
let expectedMentionsAuthorReferences = [];
function expectReferences(name, references) {
expectedMentionsAuthorReferences.push(...references);
const relationalView = exampleRelationalView();
const allMentionsAuthorReferences = Array.from(
findMentionsAuthorReferences(relationalView)
);
it(`has references for ${name}`, () => {
for (const {src, dst, who} of references) {
// For clarity in debugging: Error early if our src/dst/who is
// not actually present in the RelationalView
expect(relationalView.entity(src)).toEqual(expect.anything());
expect(relationalView.entity(dst)).toEqual(expect.anything());
expect(relationalView.entity(who)).toEqual(expect.anything());
// Then verify that the reference is actually present
expect(allMentionsAuthorReferences).toContainEqual({src, dst, who});
}
});
}
const repo = {type: N.REPO_TYPE, owner: "my-owner", name: "my-repo"};
function issueAddress(number: number): N.IssueAddress {
return {type: N.ISSUE_TYPE, repo, number: String(number)};
}
function commentAddress(
issueNum: number,
commentNum: number
): N.CommentAddress {
const id = String(commentNum);
return {type: N.COMMENT_TYPE, parent: issueAddress(issueNum), id};
}
const users = Object.freeze({
pearl: {type: N.USERLIKE_TYPE, subtype: N.USER_SUBTYPE, login: "pearl"},
steven: {type: N.USERLIKE_TYPE, subtype: N.USER_SUBTYPE, login: "steven"},
holoPearl: {
type: N.USERLIKE_TYPE,
subtype: N.USER_SUBTYPE,
login: "holo-pearl",
},
amethyst: {
type: N.USERLIKE_TYPE,
subtype: N.USER_SUBTYPE,
login: "amethyst",
},
garnet: {type: N.USERLIKE_TYPE, subtype: N.USER_SUBTYPE, login: "garnet"},
});
/** The test cases below are organized so that for every
* post in the thread rooted at Issue #2, we expect all
* of the new MentionsAuthorReferences that were created
* as a result of that post (whether because that post is
* the src or the dst). At the end, we verify that every
* MentionsAuthorReference is accounted for. */
// If an author references themself, there will be a self-directed
// MentionsAuthorReference.
expectReferences("issue:2", [
{
src: issueAddress(2),
dst: issueAddress(2),
who: users.steven,
},
]);
// When there was a multi-author, one comment could produce multiple
// edges from the same src to the same dst. The `who` field
// disambiguates.
expectReferences("comment:2_1", [
{
src: commentAddress(2, 1),
dst: issueAddress(2),
who: users.pearl,
},
{
src: commentAddress(2, 1),
dst: issueAddress(2),
who: users.steven,
},
]);
expectReferences("comment:2_2", [
{
src: commentAddress(2, 2),
dst: commentAddress(2, 1),
who: users.holoPearl,
},
// Because Steven self-referenced, that post references all of Steven's
// other posts. (If people use this for gaming, we can reconsider, but
// it should be fairly obvious)
{src: issueAddress(2), dst: commentAddress(2, 2), who: users.steven},
{src: commentAddress(2, 1), dst: commentAddress(2, 2), who: users.steven},
]);
//
expectReferences("comment:2_3", [
{
src: commentAddress(2, 1),
dst: commentAddress(2, 3),
who: users.pearl,
},
]);
// This test case demonstrates that post 2_3, which mentioned @garnet before
// she participated in the thread, still creates a reference once @garnet
// appears.
expectReferences("comment:2_4", [
{
src: commentAddress(2, 3),
dst: commentAddress(2, 4),
who: users.garnet,
},
]);
// This test case demonstrates two interesting properties:
// - comment(2,3) generated two different MentionsAuthorReferences to
// different authors
// - comment(2,4) created two identical MentionsAuthorReferences
// pointing to this one
expectReferences("comment:2_5", [
{
src: commentAddress(2, 3),
dst: commentAddress(2, 5),
who: users.amethyst,
},
{
src: commentAddress(2, 4),
dst: commentAddress(2, 5),
who: users.amethyst,
},
{
src: commentAddress(2, 4),
dst: commentAddress(2, 5),
who: users.amethyst,
},
]);
// Make it very clear that the duplicate reference is, in fact, duplicated.
// (`expectReferences` would locally pass even if the reference were not
// duplicated. The suite would still fail because we compare lengths at the
// end, but having this test makes the failure much easier to see.)
it("the same reference can be created multiple times", () => {
const duplicateReference = {
src: commentAddress(2, 4),
dst: commentAddress(2, 5),
who: users.amethyst,
};
const relationalView = exampleRelationalView();
const allMentionsAuthorReferences = Array.from(
findMentionsAuthorReferences(relationalView)
);
const matching = allMentionsAuthorReferences.filter((x) =>
deepEqual(x, duplicateReference)
);
expect(matching).toHaveLength(2);
});
const pullAddress = {repo, type: N.PULL_TYPE, number: "3"};
// Verify that we check pulls too
expectReferences("pull:3", [
{
src: pullAddress,
dst: pullAddress,
who: users.steven,
},
]);
// Finally, we verify that every reference has been accounted for.
it("has no unexpected references", () => {
const actualMentionsAuthorReferences = Array.from(
findMentionsAuthorReferences(exampleRelationalView())
);
for (const actualRef of actualMentionsAuthorReferences) {
expect(expectedMentionsAuthorReferences).toContainEqual(actualRef);
}
expect(actualMentionsAuthorReferences).toHaveLength(
expectedMentionsAuthorReferences.length
);
});
});
});