From 4afa5424226d9df03a68872fab2e9148db71aa4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Fri, 29 Jun 2018 13:55:51 -0700 Subject: [PATCH] Implement detection of paired authorship (#451) This commit enables paired authorship on GitHub authored entities. If the entity has the string "Paired with" in the body, followed by a username reference, that entity will be recorded as having dual authorship, with the nominal author and the paired-with author being treated identically in the relational view and the graph. If there's a need to pair with more than one author, the "Paired with" signifier may be repeated. The regex matcher is forgiving of capitalizing the P or W, and an optional colon may be added immediately after the word "with". Note that the code assumes that every `TextContentEntity` is also an `AuthoredEntity`. If that changes, it will cause a type error and we'll need to refine the code somewhat. As implemented, it is impossible for the same user to author a post multiple time; if this is textually suggested (e.g. by a paired-with reference to the post's nominal author), the extra paired-with references are silently ignored. Also, having a paired-with reference suppresses the basic reference (although it is possible to have a post that is paired with someone, and additionally references them). Test plan: Tests have been updated, and the behavior of the parser is extensively tested. For an end-to-end demonstration, I've also added a unit test in the relational view that verifies that sourcecred/example-github#10 has two authors. You can also see that the graph snapshot has updated to include additional authorship edges (and that corresponding reference edges have disappeared). Closes #218 --- .../__snapshots__/createGraph.test.js.snap | 68 ++++----- .../__snapshots__/relationalView.test.js.snap | 8 -- src/v3/plugins/github/parseReferences.js | 67 ++++++--- src/v3/plugins/github/parseReferences.test.js | 132 +++++++++++++++--- src/v3/plugins/github/relationalView.js | 64 +++++++-- src/v3/plugins/github/relationalView.test.js | 11 ++ 6 files changed, 252 insertions(+), 98 deletions(-) diff --git a/src/v3/plugins/github/__snapshots__/createGraph.test.js.snap b/src/v3/plugins/github/__snapshots__/createGraph.test.js.snap index 112f2b4..baa66fb 100644 --- a/src/v3/plugins/github/__snapshots__/createGraph.test.js.snap +++ b/src/v3/plugins/github/__snapshots__/createGraph.test.js.snap @@ -406,6 +406,40 @@ Array [ "dstIndex": 13, "srcIndex": 29, }, + Object { + "address": Array [ + "sourcecred", + "github", + "AUTHORS", + "2", + "USERLIKE", + "wchargin", + "4", + "ISSUE", + "sourcecred", + "example-github", + "10", + ], + "dstIndex": 17, + "srcIndex": 30, + }, + Object { + "address": Array [ + "sourcecred", + "github", + "AUTHORS", + "2", + "USERLIKE", + "wchargin", + "4", + "PULL", + "sourcecred", + "example-github", + "9", + ], + "dstIndex": 25, + "srcIndex": 30, + }, Object { "address": Array [ "sourcecred", @@ -904,23 +938,6 @@ Array [ "dstIndex": 1, "srcIndex": 24, }, - Object { - "address": Array [ - "sourcecred", - "github", - "REFERENCES", - "4", - "ISSUE", - "sourcecred", - "example-github", - "10", - "2", - "USERLIKE", - "wchargin", - ], - "dstIndex": 30, - "srcIndex": 17, - }, Object { "address": Array [ "sourcecred", @@ -995,23 +1012,6 @@ Array [ "dstIndex": 30, "srcIndex": 24, }, - Object { - "address": Array [ - "sourcecred", - "github", - "REFERENCES", - "4", - "PULL", - "sourcecred", - "example-github", - "9", - "2", - "USERLIKE", - "wchargin", - ], - "dstIndex": 30, - "srcIndex": 25, - }, Object { "address": Array [ "sourcecred", diff --git a/src/v3/plugins/github/__snapshots__/relationalView.test.js.snap b/src/v3/plugins/github/__snapshots__/relationalView.test.js.snap index 17d05e2..719f789 100644 --- a/src/v3/plugins/github/__snapshots__/relationalView.test.js.snap +++ b/src/v3/plugins/github/__snapshots__/relationalView.test.js.snap @@ -258,18 +258,10 @@ Array [ "from": "https://github.com/sourcecred/example-github/issues/10", "to": "https://github.com/sourcecred/example-github/issues/2", }, - Object { - "from": "https://github.com/sourcecred/example-github/issues/10", - "to": "https://github.com/wchargin", - }, Object { "from": "https://github.com/sourcecred/example-github/pull/5", "to": "https://github.com/wchargin", }, - Object { - "from": "https://github.com/sourcecred/example-github/pull/9", - "to": "https://github.com/wchargin", - }, Object { "from": "https://github.com/sourcecred/example-github/issues/2#issuecomment-373768703", "to": "https://github.com/sourcecred/example-github/issues/6", diff --git a/src/v3/plugins/github/parseReferences.js b/src/v3/plugins/github/parseReferences.js index 49e50e8..70db285 100644 --- a/src/v3/plugins/github/parseReferences.js +++ b/src/v3/plugins/github/parseReferences.js @@ -1,19 +1,12 @@ // @flow -function findAllMatches(re: RegExp, s: string): any[] { - // modified from: https://stackoverflow.com/a/6323598 - let m; - const matches = []; - do { - m = re.exec(s); - if (m) { - matches.push(m); - } - } while (m); - return matches; -} +export type ParsedReference = {| + // "@user" or "#123" or "https://github.com/owner/name/..." + +ref: string, + +refType: "BASIC" | "PAIRED_WITH", +|}; -export function parseReferences(body: string): string[] { +export function parseReferences(body: string): ParsedReference[] { // Note to maintainer: If it becomes necessary to encode references in a // richer format, consider implementing the type signature described in // https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998 @@ -24,17 +17,33 @@ export function parseReferences(body: string): string[] { ]; } -function findNumericReferences(body: string): string[] { - return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => x[1]); +function findNumericReferences(body: string): ParsedReference[] { + return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/g, body).map((x) => ({ + refType: "BASIC", + ref: x[1], + })); } -function findUsernameReferences(body: string): string[] { - return findAllMatches(/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g, body).map( - (x) => x[1] - ); +function findUsernameReferences(body: string): ParsedReference[] { + const pairedWithRefs = findAllMatches( + /(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/g, + body + ).map((x) => ({ref: x[1], refType: "PAIRED_WITH"})); + const basicRefs = findAllMatches( + /(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/g, + body + ).map((x) => ({ref: x[1], refType: "BASIC"})); + for (const {ref} of pairedWithRefs) { + const basicRefIndexToRemove = basicRefs.findIndex((x) => x.ref === ref); + if (basicRefIndexToRemove === -1) { + throw new Error(`Couldn't find BASIC ref for paired with ref: ${ref}`); + } + basicRefs.splice(basicRefIndexToRemove, 1); + } + return [...pairedWithRefs, ...basicRefs]; } -function findGithubUrlReferences(body: string): string[] { +function findGithubUrlReferences(body: string): ParsedReference[] { const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source; const urlRegex = new RegExp( "" + @@ -54,5 +63,21 @@ function findGithubUrlReferences(body: string): string[] { /(?:[^\w/]|$)/.source, "gm" ); - return findAllMatches(urlRegex, body).map((match) => match[1]); + return findAllMatches(urlRegex, body).map((match) => ({ + refType: "BASIC", + ref: match[1], + })); +} + +function findAllMatches(re: RegExp, s: string): any[] { + // modified from: https://stackoverflow.com/a/6323598 + let m; + const matches = []; + do { + m = re.exec(s); + if (m) { + matches.push(m); + } + } while (m); + return matches; } diff --git a/src/v3/plugins/github/parseReferences.test.js b/src/v3/plugins/github/parseReferences.test.js index a4304ac..7d97121 100644 --- a/src/v3/plugins/github/parseReferences.test.js +++ b/src/v3/plugins/github/parseReferences.test.js @@ -9,7 +9,11 @@ describe("parseReferences", () => { }); it("finds trivial numeric references", () => { - expect(parseReferences("#1, #2, and #3")).toEqual(["#1", "#2", "#3"]); + expect(parseReferences("#1, #2, and #3")).toEqual([ + {refType: "BASIC", ref: "#1"}, + {refType: "BASIC", ref: "#2"}, + {refType: "BASIC", ref: "#3"}, + ]); }); it("finds numeric references in a multiline string", () => { @@ -17,7 +21,11 @@ describe("parseReferences", () => { This is a multiline string. It refers to #1. Oh, and to #2 too. (#42 might be included too - who knows?)`; - expect(parseReferences(example)).toEqual(["#1", "#2", "#42"]); + expect(parseReferences(example)).toEqual([ + {refType: "BASIC", ref: "#1"}, + {refType: "BASIC", ref: "#2"}, + {refType: "BASIC", ref: "#42"}, + ]); }); it("does not find bad references", () => { @@ -64,14 +72,44 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222 `; const expected = [ - "https://github.com/sourcecred/example-github/issues/1", - "https://github.com/sourcecred/example-github/issues/1#issue-300934818", - "https://github.com/sourcecred/example-github/pull/3", - "https://github.com/sourcecred/example-github/pull/3#issue-171887741", - "https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442", - "https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899", - "https://github.com/sourcecred/example-github/pull/5#discussion_r171460198", - "https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222", + { + refType: "BASIC", + ref: "https://github.com/sourcecred/example-github/issues/1", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/issues/1#issue-300934818", + }, + { + refType: "BASIC", + ref: "https://github.com/sourcecred/example-github/pull/3", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/pull/3#issue-171887741", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/issues/6#issuecomment-373768442", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/pull/5#pullrequestreview-100313899", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/pull/5#discussion_r171460198", + }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222", + }, ]; expect(parseReferences(example)).toEqual(expected); @@ -93,27 +131,36 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222 it("allows but excludes leading and trailing punctuation", () => { const base = "https://github.com/sourcecred/sourcecred/pull/94"; - expect(parseReferences(`!${base}`)).toEqual([base]); - expect(parseReferences(`${base}!`)).toEqual([base]); - expect(parseReferences(`!${base}!`)).toEqual([base]); + expect(parseReferences(`!${base}`)).toEqual([ + {refType: "BASIC", ref: base}, + ]); + expect(parseReferences(`${base}!`)).toEqual([ + {refType: "BASIC", ref: base}, + ]); + expect(parseReferences(`!${base}!`)).toEqual([ + {refType: "BASIC", ref: base}, + ]); }); it("finds username references", () => { expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([ - "@wchargin", - "@decentralion", + {refType: "BASIC", ref: "@wchargin"}, + {refType: "BASIC", ref: "@decentralion"}, ]); }); it("finds usernames with hypens and numbers", () => { expect( parseReferences("@paddy-hack and @0x00 are valid usernames") - ).toEqual(["@paddy-hack", "@0x00"]); + ).toEqual([ + {refType: "BASIC", ref: "@paddy-hack"}, + {refType: "BASIC", ref: "@0x00"}, + ]); }); it("finds username references by exact url", () => { expect(parseReferences("greetings https://github.com/wchargin")).toEqual([ - "https://github.com/wchargin", + {refType: "BASIC", ref: "https://github.com/wchargin"}, ]); }); @@ -123,9 +170,54 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222 "@wchargin commented on #125, eg https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856" ) ).toEqual([ - "#125", - "https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856", - "@wchargin", + {refType: "BASIC", ref: "#125"}, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/sourcecred/pull/125#pullrequestreview-113402856", + }, + {refType: "BASIC", ref: "@wchargin"}, + ]); + }); + + it("finds paired with references", () => { + // Note that there is *not* also a BASIC ref to @wchargin + expect(parseReferences("paired with @wchargin")).toEqual([ + {refType: "PAIRED_WITH", ref: "@wchargin"}, + ]); + }); + + it("paired with allows flexible capitalization and hyphens or colons", () => { + const examples = [ + "paired with @wchargin", + "paired-with @wchargin", + "paired with: @wchargin", + "Paired with @wchargin", + "Paired With @wchargin", + "Paired With: @wchargin", + "Paired-With: @wchargin", + ]; + for (const example of examples) { + // Note that there is *not* also a BASIC ref to @wchargin + expect(parseReferences(example)).toEqual([ + {refType: "PAIRED_WITH", ref: "@wchargin"}, + ]); + } + }); + + it("can find a mixture of paired with and BASIC references", () => { + expect(parseReferences("paired with @wchargin, thanks @wchargin")).toEqual([ + {refType: "PAIRED_WITH", ref: "@wchargin"}, + {refType: "BASIC", ref: "@wchargin"}, + ]); + }); + + it("multiple paired with refs are OK", () => { + expect( + parseReferences("paired with @wchargin and paired with @decentralion") + ).toEqual([ + {refType: "PAIRED_WITH", ref: "@wchargin"}, + {refType: "PAIRED_WITH", ref: "@decentralion"}, ]); }); }); diff --git a/src/v3/plugins/github/relationalView.js b/src/v3/plugins/github/relationalView.js index 34cab00..4598f2a 100644 --- a/src/v3/plugins/github/relationalView.js +++ b/src/v3/plugins/github/relationalView.js @@ -224,7 +224,7 @@ export class RelationalView { address, url: json.url, comments: json.comments.nodes.map((x) => this._addComment(address, x)), - nominalAuthor: this._addNullableAuthor(json.author), + authors: this._addNullableAuthor(json.author), body: json.body, title: json.title, }; @@ -251,7 +251,7 @@ export class RelationalView { url: json.url, comments: json.comments.nodes.map((x) => this._addComment(address, x)), reviews: json.reviews.nodes.map((x) => this._addReview(address, x)), - nominalAuthor: this._addNullableAuthor(json.author), + authors: this._addNullableAuthor(json.author), body: json.body, title: json.title, mergedAs, @@ -274,7 +274,7 @@ export class RelationalView { state: json.state, comments: json.comments.nodes.map((x) => this._addComment(address, x)), body: json.body, - nominalAuthor: this._addNullableAuthor(json.author), + authors: this._addNullableAuthor(json.author), }; this._reviews.set(N.toRaw(address), entry); return address; @@ -302,16 +302,16 @@ export class RelationalView { const entry: CommentEntry = { address, url: json.url, - nominalAuthor: this._addNullableAuthor(json.author), + authors: this._addNullableAuthor(json.author), body: json.body, }; this._comments.set(N.toRaw(address), entry); return address; } - _addNullableAuthor(json: Q.NullableAuthorJSON): ?UserlikeAddress { + _addNullableAuthor(json: Q.NullableAuthorJSON): UserlikeAddress[] { if (json == null) { - return null; + return []; } else { const address: UserlikeAddress = { type: N.USERLIKE_TYPE, @@ -319,7 +319,7 @@ export class RelationalView { }; const entry: UserlikeEntry = {address, url: json.url}; this._userlikes.set(N.toRaw(address), entry); - return address; + return [address]; } } @@ -346,10 +346,36 @@ export class RelationalView { } for (const e of this.textContentEntities()) { const srcAddress = e.address(); - for (const ref of parseReferences(e.body())) { + for (const {ref, refType} of parseReferences(e.body())) { const refAddress = refToAddress.get(ref); if (refAddress != null) { - this._addReference(srcAddress, refAddress); + switch (refType) { + case "BASIC": + this._addReference(srcAddress, refAddress); + break; + case "PAIRED_WITH": + if (refAddress.type !== N.USERLIKE_TYPE) { + throw new Error( + `Invariant error: @-ref did not refer to userlike: ${stringify( + refAddress + )}` + ); + } + const userlike = this.userlike(refAddress); + if (userlike == null) { + throw new Error( + `Invariant error: nonexistent reference: ${stringify( + refAddress + )}` + ); + } + this._addExtraAuthor(e, userlike); + break; + default: + // eslint-disable-next-line no-unused-expressions + (refType: empty); + throw new Error(`Unexpected refType: ${refType}`); + } } } } @@ -372,6 +398,15 @@ export class RelationalView { } } + _addExtraAuthor(e: AuthoredEntity, extraAuthor: Userlike) { + for (const existingAuthor of e.authors()) { + if (existingAuthor.login() === extraAuthor.login()) { + return; // user can't author the same thing twice + } + } + e._entry.authors.push(extraAuthor.address()); + } + *_referencedBy(e: ReferentEntity): Iterator { const refs = this._mapReferencedBy.get(N.toRaw(e.address())); if (refs == null) { @@ -520,7 +555,7 @@ type IssueEntry = {| +body: string, +url: string, +comments: CommentAddress[], - +nominalAuthor: ?UserlikeAddress, + +authors: UserlikeAddress[], |}; export class Issue extends _Entity { @@ -566,9 +601,9 @@ type PullEntry = {| +comments: CommentAddress[], +reviews: ReviewAddress[], +mergedAs: ?GitNode.CommitAddress, - +nominalAuthor: ?UserlikeAddress, +additions: number, +deletions: number, + +authors: UserlikeAddress[], |}; export class Pull extends _Entity { @@ -627,7 +662,7 @@ type ReviewEntry = {| +url: string, +comments: CommentAddress[], +state: Q.ReviewState, - +nominalAuthor: ?UserlikeAddress, + +authors: UserlikeAddress[], |}; export class Review extends _Entity { @@ -666,7 +701,7 @@ type CommentEntry = {| +address: CommentAddress, +body: string, +url: string, - +nominalAuthor: ?UserlikeAddress, + +authors: UserlikeAddress[], |}; export class Comment extends _Entity { @@ -737,8 +772,7 @@ function* getAuthors( view: RelationalView, entry: IssueEntry | PullEntry | ReviewEntry | CommentEntry ) { - const address = entry.nominalAuthor; - if (address != null) { + for (const address of entry.authors) { const author = view.userlike(address); yield assertExists(author, address); } diff --git a/src/v3/plugins/github/relationalView.test.js b/src/v3/plugins/github/relationalView.test.js index c2d089d..baa1e1c 100644 --- a/src/v3/plugins/github/relationalView.test.js +++ b/src/v3/plugins/github/relationalView.test.js @@ -173,6 +173,17 @@ describe("plugins/github/relationalView", () => { hasCorrectParent("review", review); }); + it("paired with edges", () => { + const issue10 = Array.from(view.issues()).find((x) => x.number() === "10"); + if (issue10 == null) { + throw new Error(`Unable to find issue #10`); + } + expect(Array.from(issue10.authors()).map((x) => x.login())).toEqual([ + "decentralion", + "wchargin", + ]); + }); + describe("reference detection", () => { // create url->url reference maps, for convenient snapshot readability const allReferences: Map> = new Map();