Implement detection of paired authorship (#451)

This commit enables paired authorship on GitHub authored entities.
If the entity has the string "Paired with" in the body, followed by a
username reference, that entity will be recorded as having dual
authorship, with the nominal author and the paired-with author being
treated identically in the relational view and the graph.

If there's a need to pair with more than one author, the "Paired with"
signifier may be repeated. The regex matcher is forgiving of
capitalizing the P or W, and an optional colon may be added immediately
after the word "with".

Note that the code assumes that every `TextContentEntity` is also an
`AuthoredEntity`. If that changes, it will cause a type error and we'll
need to refine the code somewhat.

As implemented, it is impossible for the same user to author a post
multiple time; if this is textually suggested (e.g. by a paired-with
reference to the post's nominal author), the extra paired-with
references are silently ignored. Also, having a paired-with reference
suppresses the basic reference (although it is possible to have a post
that is paired with someone, and additionally references them).

Test plan:
Tests have been updated, and the behavior of the parser is extensively
tested. For an end-to-end demonstration, I've also added a unit test in
the relational view that verifies that sourcecred/example-github#10 has
two authors. You can also see that the graph snapshot has updated to
include additional authorship edges (and that corresponding reference
edges have disappeared).

Closes #218
This commit is contained in:
Dandelion Mané 2018-06-29 13:55:51 -07:00 committed by GitHub
parent d74d760f43
commit 4afa542422
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 252 additions and 98 deletions

View File

@ -406,6 +406,40 @@ Array [
"dstIndex": 13, "dstIndex": 13,
"srcIndex": 29, "srcIndex": 29,
}, },
Object {
"address": Array [
"sourcecred",
"github",
"AUTHORS",
"2",
"USERLIKE",
"wchargin",
"4",
"ISSUE",
"sourcecred",
"example-github",
"10",
],
"dstIndex": 17,
"srcIndex": 30,
},
Object {
"address": Array [
"sourcecred",
"github",
"AUTHORS",
"2",
"USERLIKE",
"wchargin",
"4",
"PULL",
"sourcecred",
"example-github",
"9",
],
"dstIndex": 25,
"srcIndex": 30,
},
Object { Object {
"address": Array [ "address": Array [
"sourcecred", "sourcecred",
@ -904,23 +938,6 @@ Array [
"dstIndex": 1, "dstIndex": 1,
"srcIndex": 24, "srcIndex": 24,
}, },
Object {
"address": Array [
"sourcecred",
"github",
"REFERENCES",
"4",
"ISSUE",
"sourcecred",
"example-github",
"10",
"2",
"USERLIKE",
"wchargin",
],
"dstIndex": 30,
"srcIndex": 17,
},
Object { Object {
"address": Array [ "address": Array [
"sourcecred", "sourcecred",
@ -995,23 +1012,6 @@ Array [
"dstIndex": 30, "dstIndex": 30,
"srcIndex": 24, "srcIndex": 24,
}, },
Object {
"address": Array [
"sourcecred",
"github",
"REFERENCES",
"4",
"PULL",
"sourcecred",
"example-github",
"9",
"2",
"USERLIKE",
"wchargin",
],
"dstIndex": 30,
"srcIndex": 25,
},
Object { Object {
"address": Array [ "address": Array [
"sourcecred", "sourcecred",

View File

@ -258,18 +258,10 @@ Array [
"from": "https://github.com/sourcecred/example-github/issues/10", "from": "https://github.com/sourcecred/example-github/issues/10",
"to": "https://github.com/sourcecred/example-github/issues/2", "to": "https://github.com/sourcecred/example-github/issues/2",
}, },
Object {
"from": "https://github.com/sourcecred/example-github/issues/10",
"to": "https://github.com/wchargin",
},
Object { Object {
"from": "https://github.com/sourcecred/example-github/pull/5", "from": "https://github.com/sourcecred/example-github/pull/5",
"to": "https://github.com/wchargin", "to": "https://github.com/wchargin",
}, },
Object {
"from": "https://github.com/sourcecred/example-github/pull/9",
"to": "https://github.com/wchargin",
},
Object { Object {
"from": "https://github.com/sourcecred/example-github/issues/2#issuecomment-373768703", "from": "https://github.com/sourcecred/example-github/issues/2#issuecomment-373768703",
"to": "https://github.com/sourcecred/example-github/issues/6", "to": "https://github.com/sourcecred/example-github/issues/6",

View File

@ -1,19 +1,12 @@
// @flow // @flow
function findAllMatches(re: RegExp, s: string): any[] { export type ParsedReference = {|
// modified from: https://stackoverflow.com/a/6323598 // "@user" or "#123" or "https://github.com/owner/name/..."
let m; +ref: string,
const matches = []; +refType: "BASIC" | "PAIRED_WITH",
do { |};
m = re.exec(s);
if (m) {
matches.push(m);
}
} while (m);
return matches;
}
export function parseReferences(body: string): string[] { export function parseReferences(body: string): ParsedReference[] {
// Note to maintainer: If it becomes necessary to encode references in a // Note to maintainer: If it becomes necessary to encode references in a
// richer format, consider implementing the type signature described in // richer format, consider implementing the type signature described in
// https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998 // https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998
@ -24,17 +17,33 @@ export function parseReferences(body: string): string[] {
]; ];
} }
function findNumericReferences(body: string): string[] { function findNumericReferences(body: string): ParsedReference[] {
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => x[1]); return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => ({
refType: "BASIC",
ref: x[1],
}));
} }
function findUsernameReferences(body: string): string[] { function findUsernameReferences(body: string): ParsedReference[] {
return findAllMatches(/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g, body).map( const pairedWithRefs = findAllMatches(
(x) => x[1] /(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/g,
); body
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
const basicRefs = findAllMatches(
/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g,
body
).map((x) => ({ref: x[1], refType: "BASIC"}));
for (const {ref} of pairedWithRefs) {
const basicRefIndexToRemove = basicRefs.findIndex((x) => x.ref === ref);
if (basicRefIndexToRemove === -1) {
throw new Error(`Couldn't find BASIC ref for paired with ref: ${ref}`);
}
basicRefs.splice(basicRefIndexToRemove, 1);
}
return [...pairedWithRefs, ...basicRefs];
} }
function findGithubUrlReferences(body: string): string[] { function findGithubUrlReferences(body: string): ParsedReference[] {
const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source; const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source;
const urlRegex = new RegExp( const urlRegex = new RegExp(
"" + "" +
@ -54,5 +63,21 @@ function findGithubUrlReferences(body: string): string[] {
/(?:[^\w/]|$)/.source, /(?:[^\w/]|$)/.source,
"gm" "gm"
); );
return findAllMatches(urlRegex, body).map((match) => match[1]); return findAllMatches(urlRegex, body).map((match) => ({
refType: "BASIC",
ref: match[1],
}));
}
function findAllMatches(re: RegExp, s: string): any[] {
// modified from: https://stackoverflow.com/a/6323598
let m;
const matches = [];
do {
m = re.exec(s);
if (m) {
matches.push(m);
}
} while (m);
return matches;
} }

View File

@ -9,7 +9,11 @@ describe("parseReferences", () => {
}); });
it("finds trivial numeric references", () => { it("finds trivial numeric references", () => {
expect(parseReferences("#1, #2, and #3")).toEqual(["#1", "#2", "#3"]); expect(parseReferences("#1, #2, and #3")).toEqual([
{refType: "BASIC", ref: "#1"},
{refType: "BASIC", ref: "#2"},
{refType: "BASIC", ref: "#3"},
]);
}); });
it("finds numeric references in a multiline string", () => { it("finds numeric references in a multiline string", () => {
@ -17,7 +21,11 @@ describe("parseReferences", () => {
This is a multiline string. This is a multiline string.
It refers to #1. Oh, and to #2 too. It refers to #1. Oh, and to #2 too.
(#42 might be included too - who knows?)`; (#42 might be included too - who knows?)`;
expect(parseReferences(example)).toEqual(["#1", "#2", "#42"]); expect(parseReferences(example)).toEqual([
{refType: "BASIC", ref: "#1"},
{refType: "BASIC", ref: "#2"},
{refType: "BASIC", ref: "#42"},
]);
}); });
it("does not find bad references", () => { it("does not find bad references", () => {
@ -64,14 +72,44 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
`; `;
const expected = [ const expected = [
"https://github.com/sourcecred/example-github/issues/1", {
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/issues/1",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/issues/1#issue-300934818", "https://github.com/sourcecred/example-github/issues/1#issue-300934818",
"https://github.com/sourcecred/example-github/pull/3", },
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/pull/3",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/3#issue-171887741", "https://github.com/sourcecred/example-github/pull/3#issue-171887741",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442", "https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899", "https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/5#discussion_r171460198", "https://github.com/sourcecred/example-github/pull/5#discussion_r171460198",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222", "https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222",
},
]; ];
expect(parseReferences(example)).toEqual(expected); expect(parseReferences(example)).toEqual(expected);
@ -93,27 +131,36 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
it("allows but excludes leading and trailing punctuation", () => { it("allows but excludes leading and trailing punctuation", () => {
const base = "https://github.com/sourcecred/sourcecred/pull/94"; const base = "https://github.com/sourcecred/sourcecred/pull/94";
expect(parseReferences(`!${base}`)).toEqual([base]); expect(parseReferences(`!${base}`)).toEqual([
expect(parseReferences(`${base}!`)).toEqual([base]); {refType: "BASIC", ref: base},
expect(parseReferences(`!${base}!`)).toEqual([base]); ]);
expect(parseReferences(`${base}!`)).toEqual([
{refType: "BASIC", ref: base},
]);
expect(parseReferences(`!${base}!`)).toEqual([
{refType: "BASIC", ref: base},
]);
}); });
it("finds username references", () => { it("finds username references", () => {
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([ expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
"@wchargin", {refType: "BASIC", ref: "@wchargin"},
"@decentralion", {refType: "BASIC", ref: "@decentralion"},
]); ]);
}); });
it("finds usernames with hypens and numbers", () => { it("finds usernames with hypens and numbers", () => {
expect( expect(
parseReferences("@paddy-hack and @0x00 are valid usernames") parseReferences("@paddy-hack and @0x00 are valid usernames")
).toEqual(["@paddy-hack", "@0x00"]); ).toEqual([
{refType: "BASIC", ref: "@paddy-hack"},
{refType: "BASIC", ref: "@0x00"},
]);
}); });
it("finds username references by exact url", () => { it("finds username references by exact url", () => {
expect(parseReferences("greetings https://github.com/wchargin")).toEqual([ expect(parseReferences("greetings https://github.com/wchargin")).toEqual([
"https://github.com/wchargin", {refType: "BASIC", ref: "https://github.com/wchargin"},
]); ]);
}); });
@ -123,9 +170,54 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
"@wchargin commented on #125, eg https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856" "@wchargin commented on #125, eg https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856"
) )
).toEqual([ ).toEqual([
"#125", {refType: "BASIC", ref: "#125"},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856", "https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856",
"@wchargin", },
{refType: "BASIC", ref: "@wchargin"},
]);
});
it("finds paired with references", () => {
// Note that there is *not* also a BASIC ref to @wchargin
expect(parseReferences("paired with @wchargin")).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
]);
});
it("paired with allows flexible capitalization and hyphens or colons", () => {
const examples = [
"paired with @wchargin",
"paired-with @wchargin",
"paired with: @wchargin",
"Paired with @wchargin",
"Paired With @wchargin",
"Paired With: @wchargin",
"Paired-With: @wchargin",
];
for (const example of examples) {
// Note that there is *not* also a BASIC ref to @wchargin
expect(parseReferences(example)).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
]);
}
});
it("can find a mixture of paired with and BASIC references", () => {
expect(parseReferences("paired with @wchargin, thanks @wchargin")).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
{refType: "BASIC", ref: "@wchargin"},
]);
});
it("multiple paired with refs are OK", () => {
expect(
parseReferences("paired with @wchargin and paired with @decentralion")
).toEqual([
{refType: "PAIRED_WITH", ref: "@wchargin"},
{refType: "PAIRED_WITH", ref: "@decentralion"},
]); ]);
}); });
}); });

View File

@ -224,7 +224,7 @@ export class RelationalView {
address, address,
url: json.url, url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: json.comments.nodes.map((x) => this._addComment(address, x)),
nominalAuthor: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
title: json.title, title: json.title,
}; };
@ -251,7 +251,7 @@ export class RelationalView {
url: json.url, url: json.url,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: json.comments.nodes.map((x) => this._addComment(address, x)),
reviews: json.reviews.nodes.map((x) => this._addReview(address, x)), reviews: json.reviews.nodes.map((x) => this._addReview(address, x)),
nominalAuthor: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
title: json.title, title: json.title,
mergedAs, mergedAs,
@ -274,7 +274,7 @@ export class RelationalView {
state: json.state, state: json.state,
comments: json.comments.nodes.map((x) => this._addComment(address, x)), comments: json.comments.nodes.map((x) => this._addComment(address, x)),
body: json.body, body: json.body,
nominalAuthor: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
}; };
this._reviews.set(N.toRaw(address), entry); this._reviews.set(N.toRaw(address), entry);
return address; return address;
@ -302,16 +302,16 @@ export class RelationalView {
const entry: CommentEntry = { const entry: CommentEntry = {
address, address,
url: json.url, url: json.url,
nominalAuthor: this._addNullableAuthor(json.author), authors: this._addNullableAuthor(json.author),
body: json.body, body: json.body,
}; };
this._comments.set(N.toRaw(address), entry); this._comments.set(N.toRaw(address), entry);
return address; return address;
} }
_addNullableAuthor(json: Q.NullableAuthorJSON): ?UserlikeAddress { _addNullableAuthor(json: Q.NullableAuthorJSON): UserlikeAddress[] {
if (json == null) { if (json == null) {
return null; return [];
} else { } else {
const address: UserlikeAddress = { const address: UserlikeAddress = {
type: N.USERLIKE_TYPE, type: N.USERLIKE_TYPE,
@ -319,7 +319,7 @@ export class RelationalView {
}; };
const entry: UserlikeEntry = {address, url: json.url}; const entry: UserlikeEntry = {address, url: json.url};
this._userlikes.set(N.toRaw(address), entry); this._userlikes.set(N.toRaw(address), entry);
return address; return [address];
} }
} }
@ -346,10 +346,36 @@ export class RelationalView {
} }
for (const e of this.textContentEntities()) { for (const e of this.textContentEntities()) {
const srcAddress = e.address(); const srcAddress = e.address();
for (const ref of parseReferences(e.body())) { for (const {ref, refType} of parseReferences(e.body())) {
const refAddress = refToAddress.get(ref); const refAddress = refToAddress.get(ref);
if (refAddress != null) { if (refAddress != null) {
switch (refType) {
case "BASIC":
this._addReference(srcAddress, refAddress); this._addReference(srcAddress, refAddress);
break;
case "PAIRED_WITH":
if (refAddress.type !== N.USERLIKE_TYPE) {
throw new Error(
`Invariant error: @-ref did not refer to userlike: ${stringify(
refAddress
)}`
);
}
const userlike = this.userlike(refAddress);
if (userlike == null) {
throw new Error(
`Invariant error: nonexistent reference: ${stringify(
refAddress
)}`
);
}
this._addExtraAuthor(e, userlike);
break;
default:
// eslint-disable-next-line no-unused-expressions
(refType: empty);
throw new Error(`Unexpected refType: ${refType}`);
}
} }
} }
} }
@ -372,6 +398,15 @@ export class RelationalView {
} }
} }
_addExtraAuthor(e: AuthoredEntity, extraAuthor: Userlike) {
for (const existingAuthor of e.authors()) {
if (existingAuthor.login() === extraAuthor.login()) {
return; // user can't author the same thing twice
}
}
e._entry.authors.push(extraAuthor.address());
}
*_referencedBy(e: ReferentEntity): Iterator<TextContentEntity> { *_referencedBy(e: ReferentEntity): Iterator<TextContentEntity> {
const refs = this._mapReferencedBy.get(N.toRaw(e.address())); const refs = this._mapReferencedBy.get(N.toRaw(e.address()));
if (refs == null) { if (refs == null) {
@ -520,7 +555,7 @@ type IssueEntry = {|
+body: string, +body: string,
+url: string, +url: string,
+comments: CommentAddress[], +comments: CommentAddress[],
+nominalAuthor: ?UserlikeAddress, +authors: UserlikeAddress[],
|}; |};
export class Issue extends _Entity<IssueEntry> { export class Issue extends _Entity<IssueEntry> {
@ -566,9 +601,9 @@ type PullEntry = {|
+comments: CommentAddress[], +comments: CommentAddress[],
+reviews: ReviewAddress[], +reviews: ReviewAddress[],
+mergedAs: ?GitNode.CommitAddress, +mergedAs: ?GitNode.CommitAddress,
+nominalAuthor: ?UserlikeAddress,
+additions: number, +additions: number,
+deletions: number, +deletions: number,
+authors: UserlikeAddress[],
|}; |};
export class Pull extends _Entity<PullEntry> { export class Pull extends _Entity<PullEntry> {
@ -627,7 +662,7 @@ type ReviewEntry = {|
+url: string, +url: string,
+comments: CommentAddress[], +comments: CommentAddress[],
+state: Q.ReviewState, +state: Q.ReviewState,
+nominalAuthor: ?UserlikeAddress, +authors: UserlikeAddress[],
|}; |};
export class Review extends _Entity<ReviewEntry> { export class Review extends _Entity<ReviewEntry> {
@ -666,7 +701,7 @@ type CommentEntry = {|
+address: CommentAddress, +address: CommentAddress,
+body: string, +body: string,
+url: string, +url: string,
+nominalAuthor: ?UserlikeAddress, +authors: UserlikeAddress[],
|}; |};
export class Comment extends _Entity<CommentEntry> { export class Comment extends _Entity<CommentEntry> {
@ -737,8 +772,7 @@ function* getAuthors(
view: RelationalView, view: RelationalView,
entry: IssueEntry | PullEntry | ReviewEntry | CommentEntry entry: IssueEntry | PullEntry | ReviewEntry | CommentEntry
) { ) {
const address = entry.nominalAuthor; for (const address of entry.authors) {
if (address != null) {
const author = view.userlike(address); const author = view.userlike(address);
yield assertExists(author, address); yield assertExists(author, address);
} }

View File

@ -173,6 +173,17 @@ describe("plugins/github/relationalView", () => {
hasCorrectParent("review", review); hasCorrectParent("review", review);
}); });
it("paired with edges", () => {
const issue10 = Array.from(view.issues()).find((x) => x.number() === "10");
if (issue10 == null) {
throw new Error(`Unable to find issue #10`);
}
expect(Array.from(issue10.authors()).map((x) => x.login())).toEqual([
"decentralion",
"wchargin",
]);
});
describe("reference detection", () => { describe("reference detection", () => {
// create url->url reference maps, for convenient snapshot readability // create url->url reference maps, for convenient snapshot readability
const allReferences: Map<string, Set<string>> = new Map(); const allReferences: Map<string, Set<string>> = new Map();