Implement detection of paired authorship (#451)

This commit enables paired authorship on GitHub authored entities.
If the entity has the string "Paired with" in the body, followed by a
username reference, that entity will be recorded as having dual
authorship, with the nominal author and the paired-with author being
treated identically in the relational view and the graph.

If there's a need to pair with more than one author, the "Paired with"
signifier may be repeated. The regex matcher is forgiving of
capitalizing the P or W, and an optional colon may be added immediately
after the word "with".

Note that the code assumes that every `TextContentEntity` is also an
`AuthoredEntity`. If that changes, it will cause a type error and we'll
need to refine the code somewhat.

As implemented, it is impossible for the same user to author a post
multiple time; if this is textually suggested (e.g. by a paired-with
reference to the post's nominal author), the extra paired-with
references are silently ignored. Also, having a paired-with reference
suppresses the basic reference (although it is possible to have a post
that is paired with someone, and additionally references them).

Test plan:
Tests have been updated, and the behavior of the parser is extensively
tested. For an end-to-end demonstration, I've also added a unit test in
the relational view that verifies that sourcecred/example-github#10 has
two authors. You can also see that the graph snapshot has updated to
include additional authorship edges (and that corresponding reference
edges have disappeared).

Closes #218
This commit is contained in:
Dandelion Mané 2018-06-29 13:55:51 -07:00 committed by GitHub
parent d74d760f43
commit 4afa542422
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 252 additions and 98 deletions

View File

@ -406,6 +406,40 @@ Array [
"dstIndex": 13,
"srcIndex": 29,
},
Object {
"address": Array [
"sourcecred",
"github",
"AUTHORS",
"2",
"USERLIKE",
"wchargin",
"4",
"ISSUE",
"sourcecred",
"example-github",
"10",
],
"dstIndex": 17,
"srcIndex": 30,
},
Object {
"address": Array [
"sourcecred",
"github",
"AUTHORS",
"2",
"USERLIKE",
"wchargin",
"4",
"PULL",
"sourcecred",
"example-github",
"9",
],
"dstIndex": 25,
"srcIndex": 30,
},
Object {
"address": Array [
"sourcecred",
@ -904,23 +938,6 @@ Array [
"dstIndex": 1,
"srcIndex": 24,
},
Object {
"address": Array [
"sourcecred",
"github",
"REFERENCES",
"4",
"ISSUE",
"sourcecred",
"example-github",
"10",
"2",
"USERLIKE",
"wchargin",
],
"dstIndex": 30,
"srcIndex": 17,
},
Object {
"address": Array [
"sourcecred",
@ -995,23 +1012,6 @@ Array [
"dstIndex": 30,
"srcIndex": 24,
},
Object {
"address": Array [
"sourcecred",
"github",
"REFERENCES",
"4",
"PULL",
"sourcecred",
"example-github",
"9",
"2",
"USERLIKE",
"wchargin",
],
"dstIndex": 30,
"srcIndex": 25,
},
Object {
"address": Array [
"sourcecred",

View File

@ -258,18 +258,10 @@ Array [
"from": "https://github.com/sourcecred/example-github/issues/10",
"to": "https://github.com/sourcecred/example-github/issues/2",
},
Object {
"from": "https://github.com/sourcecred/example-github/issues/10",
"to": "https://github.com/wchargin",
},
Object {
"from": "https://github.com/sourcecred/example-github/pull/5",
"to": "https://github.com/wchargin",
},
Object {
"from": "https://github.com/sourcecred/example-github/pull/9",
"to": "https://github.com/wchargin",
},
Object {
"from": "https://github.com/sourcecred/example-github/issues/2#issuecomment-373768703",
"to": "https://github.com/sourcecred/example-github/issues/6",

View File

@ -1,19 +1,12 @@
// @flow
function findAllMatches(re: RegExp, s: string): any[] {
// modified from: https://stackoverflow.com/a/6323598
let m;
const matches = [];
do {
m = re.exec(s);
if (m) {
matches.push(m);
}
} while (m);
return matches;
}
export type ParsedReference = {|
// "@user" or "#123" or "https://github.com/owner/name/..."
+ref: string,
+refType: "BASIC" | "PAIRED_WITH",
|};
export function parseReferences(body: string): string[] {
export function parseReferences(body: string): ParsedReference[] {
// Note to maintainer: If it becomes necessary to encode references in a
// richer format, consider implementing the type signature described in
// https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998
@ -24,17 +17,33 @@ export function parseReferences(body: string): string[] {
];
}
function findNumericReferences(body: string): string[] {
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => x[1]);
function findNumericReferences(body: string): ParsedReference[] {
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => ({
refType: "BASIC",
ref: x[1],
}));
}
function findUsernameReferences(body: string): string[] {
return findAllMatches(/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g, body).map(
(x) => x[1]
);
function findUsernameReferences(body: string): ParsedReference[] {
const pairedWithRefs = findAllMatches(
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/g,
body
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
const basicRefs = findAllMatches(
/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g,
body
).map((x) => ({ref: x[1], refType: "BASIC"}));
for (const {ref} of pairedWithRefs) {
const basicRefIndexToRemove = basicRefs.findIndex((x) => x.ref === ref);
if (basicRefIndexToRemove === -1) {
throw new Error(`Couldn't find BASIC ref for paired with ref: ${ref}`);
}
basicRefs.splice(basicRefIndexToRemove, 1);
}
return [...pairedWithRefs, ...basicRefs];
}
function findGithubUrlReferences(body: string): string[] {
function findGithubUrlReferences(body: string): ParsedReference[] {
const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source;
const urlRegex = new RegExp(
"" +
@ -54,5 +63,21 @@ function findGithubUrlReferences(body: string): string[] {
/(?:[^\w/]|$)/.source,
"gm"
);
return findAllMatches(urlRegex, body).map((match) => match[1]);
return findAllMatches(urlRegex, body).map((match) => ({
refType: "BASIC",
ref: match[1],
}));
}
function findAllMatches(re: RegExp, s: string): any[] {
// modified from: https://stackoverflow.com/a/6323598
let m;
const matches = [];
do {
m = re.exec(s);
if (m) {
matches.push(m);
}
} while (m);
return matches;
}

View File

@ -9,7 +9,11 @@ describe("parseReferences", () => {
});
it("finds trivial numeric references", () => {
expect(parseReferences("#1, #2, and #3")).toEqual(["#1", "#2", "#3"]);
expect(parseReferences("#1, #2, and #3")).toEqual([
{refType: "BASIC", ref: "#1"},
{refType: "BASIC", ref: "#2"},
{refType: "BASIC", ref: "#3"},
]);
});
it("finds numeric references in a multiline string", () => {
@ -17,7 +21,11 @@ describe("parseReferences", () => {
This is a multiline string.
It refers to #1. Oh, and to #2 too.
(#42 might be included too - who knows?)`;
expect(parseReferences(example)).toEqual(["#1", "#2", "#42"]);
expect(parseReferences(example)).toEqual([
{refType: "BASIC", ref: "#1"},
{refType: "BASIC", ref: "#2"},
{refType: "BASIC", ref: "#42"},
]);
});
it("does not find bad references", () => {
@ -64,14 +72,44 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
`;
const expected = [
"https://github.com/sourcecred/example-github/issues/1",
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/issues/1",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/issues/1#issue-300934818",
"https://github.com/sourcecred/example-github/pull/3",
},
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/pull/3",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/3#issue-171887741",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/5#discussion_r171460198",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222",
},
];
expect(parseReferences(example)).toEqual(expected);
@ -93,27 +131,36 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
it("allows but excludes leading and trailing punctuation", () => {
const base = "https://github.com/sourcecred/sourcecred/pull/94";
expect(parseReferences(`!${base}`)).toEqual([base]);
expect(parseReferences(`${base}!`)).toEqual([base]);
expect(parseReferences(`!${base}!`)).toEqual([base]);
expect(parseReferences(`!${base}`)).toEqual([
{refType: "BASIC", ref: base},
]);
expect(parseReferences(`${base}!`)).toEqual([
{refType: "BASIC", ref: base},
]);
expect(parseReferences(`!${base}!`)).toEqual([
{refType: "BASIC", ref: base},
]);
});
it("finds username references", () => {
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
"@wchargin",
"@decentralion",
{refType: "BASIC", ref: "@wchargin"},
{refType: "BASIC", ref: "@decentralion"},
]);
});
it("finds usernames with hypens and numbers", () => {
expect(
parseReferences("@paddy-hack and @0x00 are valid usernames")
).toEqual(["@paddy-hack", "@0x00"]);
).toEqual([
{refType: "BASIC", ref: "@paddy-hack"},
{refType: "BASIC", ref: "@0x00"},
]);
});
it("finds username references by exact url", () => {
expect(parseReferences("greetings https://github.com/wchargin")).toEqual([
"https://github.com/wchargin",
{refType: "BASIC", ref: "https://github.com/wchargin"},
]);
});
@ -123,9 +170,54 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
"@wchargin commented on #125, eg https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856"
)
).toEqual([
"#125",
{refType: "BASIC", ref: "#125"},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856",
"@wchargin",
},
{refType: "BASIC", ref: "@wchargin"},
]);
});
it("finds paired with references", () => {
// Note that there is *not* also a BASIC ref to @wchargin
expect(parseReferences("paired with @wchargin")).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
]);
});
it("paired with allows flexible capitalization and hyphens or colons", () => {
const examples = [
"paired with @wchargin",
"paired-with @wchargin",
"paired with: @wchargin",
"Paired with @wchargin",
"Paired With @wchargin",
"Paired With: @wchargin",
"Paired-With: @wchargin",
];
for (const example of examples) {
// Note that there is *not* also a BASIC ref to @wchargin
expect(parseReferences(example)).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
]);
}
});
it("can find a mixture of paired with and BASIC references", () => {
expect(parseReferences("paired with @wchargin, thanks @wchargin")).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
{refType: "BASIC", ref: "@wchargin"},
]);
});
it("multiple paired with refs are OK", () => {
expect(
parseReferences("paired with @wchargin and paired with @decentralion")
).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
{refType: "PAIRED_WITH", ref: "@decentralion"},
]);
});
});

View File

@ -224,7 +224,7 @@ export class RelationalView {
address,
url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
nominalAuthor: this._addNullableAuthor(json.author),
authors: this._addNullableAuthor(json.author),
body: json.body,
title: json.title,
};
@ -251,7 +251,7 @@ export class RelationalView {
url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
reviews: json.reviews.nodes.map((x) => this._addReview(address, x)),
nominalAuthor: this._addNullableAuthor(json.author),
authors: this._addNullableAuthor(json.author),
body: json.body,
title: json.title,
mergedAs,
@ -274,7 +274,7 @@ export class RelationalView {
state: json.state,
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
body: json.body,
nominalAuthor: this._addNullableAuthor(json.author),
authors: this._addNullableAuthor(json.author),
};
this._reviews.set(N.toRaw(address), entry);
return address;
@ -302,16 +302,16 @@ export class RelationalView {
const entry: CommentEntry = {
address,
url: json.url,
nominalAuthor: this._addNullableAuthor(json.author),
authors: this._addNullableAuthor(json.author),
body: json.body,
};
this._comments.set(N.toRaw(address), entry);
return address;
}
_addNullableAuthor(json: Q.NullableAuthorJSON): ?UserlikeAddress {
_addNullableAuthor(json: Q.NullableAuthorJSON): UserlikeAddress[] {
if (json == null) {
return null;
return [];
} else {
const address: UserlikeAddress = {
type: N.USERLIKE_TYPE,
@ -319,7 +319,7 @@ export class RelationalView {
};
const entry: UserlikeEntry = {address, url: json.url};
this._userlikes.set(N.toRaw(address), entry);
return address;
return [address];
}
}
@ -346,10 +346,36 @@ export class RelationalView {
}
for (const e of this.textContentEntities()) {
const srcAddress = e.address();
for (const ref of parseReferences(e.body())) {
for (const {ref, refType} of parseReferences(e.body())) {
const refAddress = refToAddress.get(ref);
if (refAddress != null) {
switch (refType) {
case "BASIC":
this._addReference(srcAddress, refAddress);
break;
case "PAIRED_WITH":
if (refAddress.type !== N.USERLIKE_TYPE) {
throw new Error(
`Invariant error: @-ref did not refer to userlike: ${stringify(
refAddress
)}`
);
}
const userlike = this.userlike(refAddress);
if (userlike == null) {
throw new Error(
`Invariant error: nonexistent reference: ${stringify(
refAddress
)}`
);
}
this._addExtraAuthor(e, userlike);
break;
default:
// eslint-disable-next-line no-unused-expressions
(refType: empty);
throw new Error(`Unexpected refType: ${refType}`);
}
}
}
}
@ -372,6 +398,15 @@ export class RelationalView {
}
}
_addExtraAuthor(e: AuthoredEntity, extraAuthor: Userlike) {
for (const existingAuthor of e.authors()) {
if (existingAuthor.login() === extraAuthor.login()) {
return; // user can't author the same thing twice
}
}
e._entry.authors.push(extraAuthor.address());
}
*_referencedBy(e: ReferentEntity): Iterator<TextContentEntity> {
const refs = this._mapReferencedBy.get(N.toRaw(e.address()));
if (refs == null) {
@ -520,7 +555,7 @@ type IssueEntry = {|
+body: string,
+url: string,
+comments: CommentAddress[],
+nominalAuthor: ?UserlikeAddress,
+authors: UserlikeAddress[],
|};
export class Issue extends _Entity<IssueEntry> {
@ -566,9 +601,9 @@ type PullEntry = {|
+comments: CommentAddress[],
+reviews: ReviewAddress[],
+mergedAs: ?GitNode.CommitAddress,
+nominalAuthor: ?UserlikeAddress,
+additions: number,
+deletions: number,
+authors: UserlikeAddress[],
|};
export class Pull extends _Entity<PullEntry> {
@ -627,7 +662,7 @@ type ReviewEntry = {|
+url: string,
+comments: CommentAddress[],
+state: Q.ReviewState,
+nominalAuthor: ?UserlikeAddress,
+authors: UserlikeAddress[],
|};
export class Review extends _Entity<ReviewEntry> {
@ -666,7 +701,7 @@ type CommentEntry = {|
+address: CommentAddress,
+body: string,
+url: string,
+nominalAuthor: ?UserlikeAddress,
+authors: UserlikeAddress[],
|};
export class Comment extends _Entity<CommentEntry> {
@ -737,8 +772,7 @@ function* getAuthors(
view: RelationalView,
entry: IssueEntry | PullEntry | ReviewEntry | CommentEntry
) {
const address = entry.nominalAuthor;
if (address != null) {
for (const address of entry.authors) {
const author = view.userlike(address);
yield assertExists(author, address);
}

View File

@ -173,6 +173,17 @@ describe("plugins/github/relationalView", () => {
hasCorrectParent("review", review);
});
it("paired with edges", () => {
const issue10 = Array.from(view.issues()).find((x) => x.number() === "10");
if (issue10 == null) {
throw new Error(`Unable to find issue #10`);
}
expect(Array.from(issue10.authors()).map((x) => x.login())).toEqual([
"decentralion",
"wchargin",
]);
});
describe("reference detection", () => {
// create url->url reference maps, for convenient snapshot readability
const allReferences: Map<string, Set<string>> = new Map();