Implement detection of paired authorship (#451)
This commit enables paired authorship on GitHub authored entities. If the entity has the string "Paired with" in the body, followed by a username reference, that entity will be recorded as having dual authorship, with the nominal author and the paired-with author being treated identically in the relational view and the graph. If there's a need to pair with more than one author, the "Paired with" signifier may be repeated. The regex matcher is forgiving of capitalizing the P or W, and an optional colon may be added immediately after the word "with". Note that the code assumes that every `TextContentEntity` is also an `AuthoredEntity`. If that changes, it will cause a type error and we'll need to refine the code somewhat. As implemented, it is impossible for the same user to author a post multiple time; if this is textually suggested (e.g. by a paired-with reference to the post's nominal author), the extra paired-with references are silently ignored. Also, having a paired-with reference suppresses the basic reference (although it is possible to have a post that is paired with someone, and additionally references them). Test plan: Tests have been updated, and the behavior of the parser is extensively tested. For an end-to-end demonstration, I've also added a unit test in the relational view that verifies that sourcecred/example-github#10 has two authors. You can also see that the graph snapshot has updated to include additional authorship edges (and that corresponding reference edges have disappeared). Closes #218
This commit is contained in:
parent
d74d760f43
commit
4afa542422
|
@ -406,6 +406,40 @@ Array [
|
|||
"dstIndex": 13,
|
||||
"srcIndex": 29,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"AUTHORS",
|
||||
"2",
|
||||
"USERLIKE",
|
||||
"wchargin",
|
||||
"4",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"10",
|
||||
],
|
||||
"dstIndex": 17,
|
||||
"srcIndex": 30,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"AUTHORS",
|
||||
"2",
|
||||
"USERLIKE",
|
||||
"wchargin",
|
||||
"4",
|
||||
"PULL",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"9",
|
||||
],
|
||||
"dstIndex": 25,
|
||||
"srcIndex": 30,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
|
@ -904,23 +938,6 @@ Array [
|
|||
"dstIndex": 1,
|
||||
"srcIndex": 24,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"REFERENCES",
|
||||
"4",
|
||||
"ISSUE",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"10",
|
||||
"2",
|
||||
"USERLIKE",
|
||||
"wchargin",
|
||||
],
|
||||
"dstIndex": 30,
|
||||
"srcIndex": 17,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
|
@ -995,23 +1012,6 @@ Array [
|
|||
"dstIndex": 30,
|
||||
"srcIndex": 24,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
"github",
|
||||
"REFERENCES",
|
||||
"4",
|
||||
"PULL",
|
||||
"sourcecred",
|
||||
"example-github",
|
||||
"9",
|
||||
"2",
|
||||
"USERLIKE",
|
||||
"wchargin",
|
||||
],
|
||||
"dstIndex": 30,
|
||||
"srcIndex": 25,
|
||||
},
|
||||
Object {
|
||||
"address": Array [
|
||||
"sourcecred",
|
||||
|
|
|
@ -258,18 +258,10 @@ Array [
|
|||
"from": "https://github.com/sourcecred/example-github/issues/10",
|
||||
"to": "https://github.com/sourcecred/example-github/issues/2",
|
||||
},
|
||||
Object {
|
||||
"from": "https://github.com/sourcecred/example-github/issues/10",
|
||||
"to": "https://github.com/wchargin",
|
||||
},
|
||||
Object {
|
||||
"from": "https://github.com/sourcecred/example-github/pull/5",
|
||||
"to": "https://github.com/wchargin",
|
||||
},
|
||||
Object {
|
||||
"from": "https://github.com/sourcecred/example-github/pull/9",
|
||||
"to": "https://github.com/wchargin",
|
||||
},
|
||||
Object {
|
||||
"from": "https://github.com/sourcecred/example-github/issues/2#issuecomment-373768703",
|
||||
"to": "https://github.com/sourcecred/example-github/issues/6",
|
||||
|
|
|
@ -1,19 +1,12 @@
|
|||
// @flow
|
||||
|
||||
function findAllMatches(re: RegExp, s: string): any[] {
|
||||
// modified from: https://stackoverflow.com/a/6323598
|
||||
let m;
|
||||
const matches = [];
|
||||
do {
|
||||
m = re.exec(s);
|
||||
if (m) {
|
||||
matches.push(m);
|
||||
}
|
||||
} while (m);
|
||||
return matches;
|
||||
}
|
||||
export type ParsedReference = {|
|
||||
// "@user" or "#123" or "https://github.com/owner/name/..."
|
||||
+ref: string,
|
||||
+refType: "BASIC" | "PAIRED_WITH",
|
||||
|};
|
||||
|
||||
export function parseReferences(body: string): string[] {
|
||||
export function parseReferences(body: string): ParsedReference[] {
|
||||
// Note to maintainer: If it becomes necessary to encode references in a
|
||||
// richer format, consider implementing the type signature described in
|
||||
// https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998
|
||||
|
@ -24,17 +17,33 @@ export function parseReferences(body: string): string[] {
|
|||
];
|
||||
}
|
||||
|
||||
function findNumericReferences(body: string): string[] {
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => x[1]);
|
||||
function findNumericReferences(body: string): ParsedReference[] {
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => ({
|
||||
refType: "BASIC",
|
||||
ref: x[1],
|
||||
}));
|
||||
}
|
||||
|
||||
function findUsernameReferences(body: string): string[] {
|
||||
return findAllMatches(/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g, body).map(
|
||||
(x) => x[1]
|
||||
);
|
||||
function findUsernameReferences(body: string): ParsedReference[] {
|
||||
const pairedWithRefs = findAllMatches(
|
||||
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/g,
|
||||
body
|
||||
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
|
||||
const basicRefs = findAllMatches(
|
||||
/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g,
|
||||
body
|
||||
).map((x) => ({ref: x[1], refType: "BASIC"}));
|
||||
for (const {ref} of pairedWithRefs) {
|
||||
const basicRefIndexToRemove = basicRefs.findIndex((x) => x.ref === ref);
|
||||
if (basicRefIndexToRemove === -1) {
|
||||
throw new Error(`Couldn't find BASIC ref for paired with ref: ${ref}`);
|
||||
}
|
||||
basicRefs.splice(basicRefIndexToRemove, 1);
|
||||
}
|
||||
return [...pairedWithRefs, ...basicRefs];
|
||||
}
|
||||
|
||||
function findGithubUrlReferences(body: string): string[] {
|
||||
function findGithubUrlReferences(body: string): ParsedReference[] {
|
||||
const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source;
|
||||
const urlRegex = new RegExp(
|
||||
"" +
|
||||
|
@ -54,5 +63,21 @@ function findGithubUrlReferences(body: string): string[] {
|
|||
/(?:[^\w/]|$)/.source,
|
||||
"gm"
|
||||
);
|
||||
return findAllMatches(urlRegex, body).map((match) => match[1]);
|
||||
return findAllMatches(urlRegex, body).map((match) => ({
|
||||
refType: "BASIC",
|
||||
ref: match[1],
|
||||
}));
|
||||
}
|
||||
|
||||
function findAllMatches(re: RegExp, s: string): any[] {
|
||||
// modified from: https://stackoverflow.com/a/6323598
|
||||
let m;
|
||||
const matches = [];
|
||||
do {
|
||||
m = re.exec(s);
|
||||
if (m) {
|
||||
matches.push(m);
|
||||
}
|
||||
} while (m);
|
||||
return matches;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,11 @@ describe("parseReferences", () => {
|
|||
});
|
||||
|
||||
it("finds trivial numeric references", () => {
|
||||
expect(parseReferences("#1, #2, and #3")).toEqual(["#1", "#2", "#3"]);
|
||||
expect(parseReferences("#1, #2, and #3")).toEqual([
|
||||
{refType: "BASIC", ref: "#1"},
|
||||
{refType: "BASIC", ref: "#2"},
|
||||
{refType: "BASIC", ref: "#3"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("finds numeric references in a multiline string", () => {
|
||||
|
@ -17,7 +21,11 @@ describe("parseReferences", () => {
|
|||
This is a multiline string.
|
||||
It refers to #1. Oh, and to #2 too.
|
||||
(#42 might be included too - who knows?)`;
|
||||
expect(parseReferences(example)).toEqual(["#1", "#2", "#42"]);
|
||||
expect(parseReferences(example)).toEqual([
|
||||
{refType: "BASIC", ref: "#1"},
|
||||
{refType: "BASIC", ref: "#2"},
|
||||
{refType: "BASIC", ref: "#42"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not find bad references", () => {
|
||||
|
@ -64,14 +72,44 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
|
|||
`;
|
||||
|
||||
const expected = [
|
||||
"https://github.com/sourcecred/example-github/issues/1",
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref: "https://github.com/sourcecred/example-github/issues/1",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/issues/1#issue-300934818",
|
||||
"https://github.com/sourcecred/example-github/pull/3",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref: "https://github.com/sourcecred/example-github/pull/3",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/pull/3#issue-171887741",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/pull/5#discussion_r171460198",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222",
|
||||
},
|
||||
];
|
||||
|
||||
expect(parseReferences(example)).toEqual(expected);
|
||||
|
@ -93,27 +131,36 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
|
|||
|
||||
it("allows but excludes leading and trailing punctuation", () => {
|
||||
const base = "https://github.com/sourcecred/sourcecred/pull/94";
|
||||
expect(parseReferences(`!${base}`)).toEqual([base]);
|
||||
expect(parseReferences(`${base}!`)).toEqual([base]);
|
||||
expect(parseReferences(`!${base}!`)).toEqual([base]);
|
||||
expect(parseReferences(`!${base}`)).toEqual([
|
||||
{refType: "BASIC", ref: base},
|
||||
]);
|
||||
expect(parseReferences(`${base}!`)).toEqual([
|
||||
{refType: "BASIC", ref: base},
|
||||
]);
|
||||
expect(parseReferences(`!${base}!`)).toEqual([
|
||||
{refType: "BASIC", ref: base},
|
||||
]);
|
||||
});
|
||||
|
||||
it("finds username references", () => {
|
||||
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
|
||||
"@wchargin",
|
||||
"@decentralion",
|
||||
{refType: "BASIC", ref: "@wchargin"},
|
||||
{refType: "BASIC", ref: "@decentralion"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("finds usernames with hypens and numbers", () => {
|
||||
expect(
|
||||
parseReferences("@paddy-hack and @0x00 are valid usernames")
|
||||
).toEqual(["@paddy-hack", "@0x00"]);
|
||||
).toEqual([
|
||||
{refType: "BASIC", ref: "@paddy-hack"},
|
||||
{refType: "BASIC", ref: "@0x00"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("finds username references by exact url", () => {
|
||||
expect(parseReferences("greetings https://github.com/wchargin")).toEqual([
|
||||
"https://github.com/wchargin",
|
||||
{refType: "BASIC", ref: "https://github.com/wchargin"},
|
||||
]);
|
||||
});
|
||||
|
||||
|
@ -123,9 +170,54 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
|
|||
"@wchargin commented on #125, eg https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856"
|
||||
)
|
||||
).toEqual([
|
||||
"#125",
|
||||
{refType: "BASIC", ref: "#125"},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref:
|
||||
"https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856",
|
||||
"@wchargin",
|
||||
},
|
||||
{refType: "BASIC", ref: "@wchargin"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("finds paired with references", () => {
|
||||
// Note that there is *not* also a BASIC ref to @wchargin
|
||||
expect(parseReferences("paired with @wchargin")).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@wchargin"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("paired with allows flexible capitalization and hyphens or colons", () => {
|
||||
const examples = [
|
||||
"paired with @wchargin",
|
||||
"paired-with @wchargin",
|
||||
"paired with: @wchargin",
|
||||
"Paired with @wchargin",
|
||||
"Paired With @wchargin",
|
||||
"Paired With: @wchargin",
|
||||
"Paired-With: @wchargin",
|
||||
];
|
||||
for (const example of examples) {
|
||||
// Note that there is *not* also a BASIC ref to @wchargin
|
||||
expect(parseReferences(example)).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@wchargin"},
|
||||
]);
|
||||
}
|
||||
});
|
||||
|
||||
it("can find a mixture of paired with and BASIC references", () => {
|
||||
expect(parseReferences("paired with @wchargin, thanks @wchargin")).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@wchargin"},
|
||||
{refType: "BASIC", ref: "@wchargin"},
|
||||
]);
|
||||
});
|
||||
|
||||
it("multiple paired with refs are OK", () => {
|
||||
expect(
|
||||
parseReferences("paired with @wchargin and paired with @decentralion")
|
||||
).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@wchargin"},
|
||||
{refType: "PAIRED_WITH", ref: "@decentralion"},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -224,7 +224,7 @@ export class RelationalView {
|
|||
address,
|
||||
url: json.url,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
nominalAuthor: this._addNullableAuthor(json.author),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
title: json.title,
|
||||
};
|
||||
|
@ -251,7 +251,7 @@ export class RelationalView {
|
|||
url: json.url,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
reviews: json.reviews.nodes.map((x) => this._addReview(address, x)),
|
||||
nominalAuthor: this._addNullableAuthor(json.author),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
title: json.title,
|
||||
mergedAs,
|
||||
|
@ -274,7 +274,7 @@ export class RelationalView {
|
|||
state: json.state,
|
||||
comments: json.comments.nodes.map((x) => this._addComment(address, x)),
|
||||
body: json.body,
|
||||
nominalAuthor: this._addNullableAuthor(json.author),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
};
|
||||
this._reviews.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
|
@ -302,16 +302,16 @@ export class RelationalView {
|
|||
const entry: CommentEntry = {
|
||||
address,
|
||||
url: json.url,
|
||||
nominalAuthor: this._addNullableAuthor(json.author),
|
||||
authors: this._addNullableAuthor(json.author),
|
||||
body: json.body,
|
||||
};
|
||||
this._comments.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
}
|
||||
|
||||
_addNullableAuthor(json: Q.NullableAuthorJSON): ?UserlikeAddress {
|
||||
_addNullableAuthor(json: Q.NullableAuthorJSON): UserlikeAddress[] {
|
||||
if (json == null) {
|
||||
return null;
|
||||
return [];
|
||||
} else {
|
||||
const address: UserlikeAddress = {
|
||||
type: N.USERLIKE_TYPE,
|
||||
|
@ -319,7 +319,7 @@ export class RelationalView {
|
|||
};
|
||||
const entry: UserlikeEntry = {address, url: json.url};
|
||||
this._userlikes.set(N.toRaw(address), entry);
|
||||
return address;
|
||||
return [address];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -346,10 +346,36 @@ export class RelationalView {
|
|||
}
|
||||
for (const e of this.textContentEntities()) {
|
||||
const srcAddress = e.address();
|
||||
for (const ref of parseReferences(e.body())) {
|
||||
for (const {ref, refType} of parseReferences(e.body())) {
|
||||
const refAddress = refToAddress.get(ref);
|
||||
if (refAddress != null) {
|
||||
switch (refType) {
|
||||
case "BASIC":
|
||||
this._addReference(srcAddress, refAddress);
|
||||
break;
|
||||
case "PAIRED_WITH":
|
||||
if (refAddress.type !== N.USERLIKE_TYPE) {
|
||||
throw new Error(
|
||||
`Invariant error: @-ref did not refer to userlike: ${stringify(
|
||||
refAddress
|
||||
)}`
|
||||
);
|
||||
}
|
||||
const userlike = this.userlike(refAddress);
|
||||
if (userlike == null) {
|
||||
throw new Error(
|
||||
`Invariant error: nonexistent reference: ${stringify(
|
||||
refAddress
|
||||
)}`
|
||||
);
|
||||
}
|
||||
this._addExtraAuthor(e, userlike);
|
||||
break;
|
||||
default:
|
||||
// eslint-disable-next-line no-unused-expressions
|
||||
(refType: empty);
|
||||
throw new Error(`Unexpected refType: ${refType}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -372,6 +398,15 @@ export class RelationalView {
|
|||
}
|
||||
}
|
||||
|
||||
_addExtraAuthor(e: AuthoredEntity, extraAuthor: Userlike) {
|
||||
for (const existingAuthor of e.authors()) {
|
||||
if (existingAuthor.login() === extraAuthor.login()) {
|
||||
return; // user can't author the same thing twice
|
||||
}
|
||||
}
|
||||
e._entry.authors.push(extraAuthor.address());
|
||||
}
|
||||
|
||||
*_referencedBy(e: ReferentEntity): Iterator<TextContentEntity> {
|
||||
const refs = this._mapReferencedBy.get(N.toRaw(e.address()));
|
||||
if (refs == null) {
|
||||
|
@ -520,7 +555,7 @@ type IssueEntry = {|
|
|||
+body: string,
|
||||
+url: string,
|
||||
+comments: CommentAddress[],
|
||||
+nominalAuthor: ?UserlikeAddress,
|
||||
+authors: UserlikeAddress[],
|
||||
|};
|
||||
|
||||
export class Issue extends _Entity<IssueEntry> {
|
||||
|
@ -566,9 +601,9 @@ type PullEntry = {|
|
|||
+comments: CommentAddress[],
|
||||
+reviews: ReviewAddress[],
|
||||
+mergedAs: ?GitNode.CommitAddress,
|
||||
+nominalAuthor: ?UserlikeAddress,
|
||||
+additions: number,
|
||||
+deletions: number,
|
||||
+authors: UserlikeAddress[],
|
||||
|};
|
||||
|
||||
export class Pull extends _Entity<PullEntry> {
|
||||
|
@ -627,7 +662,7 @@ type ReviewEntry = {|
|
|||
+url: string,
|
||||
+comments: CommentAddress[],
|
||||
+state: Q.ReviewState,
|
||||
+nominalAuthor: ?UserlikeAddress,
|
||||
+authors: UserlikeAddress[],
|
||||
|};
|
||||
|
||||
export class Review extends _Entity<ReviewEntry> {
|
||||
|
@ -666,7 +701,7 @@ type CommentEntry = {|
|
|||
+address: CommentAddress,
|
||||
+body: string,
|
||||
+url: string,
|
||||
+nominalAuthor: ?UserlikeAddress,
|
||||
+authors: UserlikeAddress[],
|
||||
|};
|
||||
|
||||
export class Comment extends _Entity<CommentEntry> {
|
||||
|
@ -737,8 +772,7 @@ function* getAuthors(
|
|||
view: RelationalView,
|
||||
entry: IssueEntry | PullEntry | ReviewEntry | CommentEntry
|
||||
) {
|
||||
const address = entry.nominalAuthor;
|
||||
if (address != null) {
|
||||
for (const address of entry.authors) {
|
||||
const author = view.userlike(address);
|
||||
yield assertExists(author, address);
|
||||
}
|
||||
|
|
|
@ -173,6 +173,17 @@ describe("plugins/github/relationalView", () => {
|
|||
hasCorrectParent("review", review);
|
||||
});
|
||||
|
||||
it("paired with edges", () => {
|
||||
const issue10 = Array.from(view.issues()).find((x) => x.number() === "10");
|
||||
if (issue10 == null) {
|
||||
throw new Error(`Unable to find issue #10`);
|
||||
}
|
||||
expect(Array.from(issue10.authors()).map((x) => x.login())).toEqual([
|
||||
"decentralion",
|
||||
"wchargin",
|
||||
]);
|
||||
});
|
||||
|
||||
describe("reference detection", () => {
|
||||
// create url->url reference maps, for convenient snapshot readability
|
||||
const allReferences: Map<string, Set<string>> = new Map();
|
||||
|
|
Loading…
Reference in New Issue