From 41cdf2d855c75c48aa81f6cf08fb4e6c2ac1fff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Tue, 20 Mar 2018 18:10:03 -0700 Subject: [PATCH] Implement GitHub reference detection (#98) This commit only adds logic for finding references in GitHub posts, either by #-numeric reference, or explicit urls. Adding the reference edges to the graph will occur in a followon commit. Test plan: New unit tests are included --- src/plugins/github/parseReferences.js | 65 ++++++++ src/plugins/github/parseReferences.test.js | 170 +++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 src/plugins/github/parseReferences.js create mode 100644 src/plugins/github/parseReferences.test.js diff --git a/src/plugins/github/parseReferences.js b/src/plugins/github/parseReferences.js new file mode 100644 index 0000000..41d0fae --- /dev/null +++ b/src/plugins/github/parseReferences.js @@ -0,0 +1,65 @@ +// @flow + +function findAllMatches(re: RegExp, s: string): any[] { + // modified from: https://stackoverflow.com/a/6323598 + let m; + const matches = []; + do { + m = re.exec(s); + if (m) { + matches.push(m); + } + } while (m); + return matches; +} + +export function findNumericReferences(body: string): number[] { + return findAllMatches(/(?:\W|^)#(\d+)(?:\W|$)/g, body).map((x) => +x[1]); +} + +export type GithubUrlMatch = {| + +repoName: string, + +repoOwner: string, + +parentType: "pull" | "issues", + +number: number, + +commentFragment: ?{| + +fragmentType: + | "issue" // a directly linked issue or pull request + | "issuecomment" // a directly linked regular comment on issue or pull request + | "pullrequestreview" // a pull request review + | "discussion_r", // a review comment as part of a pull request review + +fragmentNumber: number, + |}, +|}; + +export function findGithubUrlReferences(body: string): GithubUrlMatch[] { + const githubNamePart = /([a-zA-Z0-9_-]+)/.source; + const urlRegex = new RegExp( + "" + + /(?:\W|^)http(?:s)?:\/\/github.com\//.source + + githubNamePart + + /\//.source + + githubNamePart + + /\/(issues|pull)\//.source + + /(\d+)/.source + + /(#(issue|issuecomment|pullrequestreview|discussion_r)-?(\d+))?/.source + + /(?:\W|$)/.source, + "gm" + ); + return findAllMatches(urlRegex, body).map((match) => { + let commentFragment: $ElementType; + if (match[5] != null) { + // we found a comment fragment + commentFragment = {fragmentType: match[6], fragmentNumber: +match[7]}; + } else { + commentFragment = null; + } + return { + repoOwner: match[1], + repoName: match[2], + parentType: match[3], + number: +match[4], + commentFragment, + }; + }); +} diff --git a/src/plugins/github/parseReferences.test.js b/src/plugins/github/parseReferences.test.js new file mode 100644 index 0000000..a6d51a9 --- /dev/null +++ b/src/plugins/github/parseReferences.test.js @@ -0,0 +1,170 @@ +// @flow + +import { + findNumericReferences, + findGithubUrlReferences, +} from "./parseReferences.js"; +import type {GithubUrlMatch} from "./parseReferences.js"; + +describe("reference finding", () => { + it("finds no numeric references when not present", () => { + expect(findNumericReferences("foo bar bod boink")).toHaveLength(0); + expect(findNumericReferences("")).toHaveLength(0); + }); + + it("finds trivial numeric references", () => { + expect(findNumericReferences("#1, #2, and #3")).toEqual([1, 2, 3]); + }); + + it("finds numeric references in a multiline string", () => { + const example = ` + This is a multiline string. + It refers to #1. Oh, and to #2 too. + (#42 might be included too - who knows?)`; + expect(findNumericReferences(example)).toEqual([1, 2, 42]); + }); + + it("does not find bad references", () => { + expect(findNumericReferences("foo#123 #124bar")).toHaveLength(0); + }); + + it("does not yet find concise cross-repo links", () => { + // The link below is valid, when we add cross-repo support we + // should fix this test case + expect(findNumericReferences("sourcecred/sourcecred#12")).toHaveLength(0); + }); + + it("finds no url references when not present", () => { + expect(findGithubUrlReferences("foo bar bod boink")).toHaveLength(0); + expect(findGithubUrlReferences("")).toHaveLength(0); + }); + + it("finds a trivial url reference", () => { + expect( + findGithubUrlReferences( + "https://github.com/sourcecred/sourcecred/issues/86" + ) + ).toHaveLength(1); + }); + + it("parses url references appropriately", () => { + const example = ` + A directly linked issue: +https://github.com/sourcecred/example-repo/issues/1 + + A directly linked issue with fragment: +https://github.com/sourcecred/example-repo/issues/1#issue-300934818 + + A directly linked pull request: +https://github.com/sourcecred/example-repo/pull/3 + + A directly linked pull request with fragment: +https://github.com/sourcecred/example-repo/pull/3#issue-171887741 + + A directly linked issue comment: +https://github.com/sourcecred/example-repo/issues/6#issuecomment-373768442 + + A directly linked pull request review: +https://github.com/sourcecred/example-repo/pull/5#pullrequestreview-100313899 + + A directly linked pull request review comment: +https://github.com/sourcecred/example-repo/pull/5#discussion_r171460198 + + A directly linked pull request comment: +https://github.com/sourcecred/example-repo/pull/3#issuecomment-369162222 + `; + + const expected = [ + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "issues", + number: 1, + commentFragment: null, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "issues", + number: 1, + commentFragment: {fragmentType: "issue", fragmentNumber: 300934818}, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "pull", + number: 3, + commentFragment: null, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "pull", + number: 3, + commentFragment: {fragmentType: "issue", fragmentNumber: 171887741}, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "issues", + number: 6, + commentFragment: { + fragmentType: "issuecomment", + fragmentNumber: 373768442, + }, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "pull", + number: 5, + commentFragment: { + fragmentType: "pullrequestreview", + fragmentNumber: 100313899, + }, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "pull", + number: 5, + commentFragment: { + fragmentType: "discussion_r", + fragmentNumber: 171460198, + }, + }, + { + repoName: "example-repo", + repoOwner: "sourcecred", + parentType: "pull", + number: 3, + commentFragment: { + fragmentType: "issuecomment", + fragmentNumber: 369162222, + }, + }, + ]; + + expect(findGithubUrlReferences(example)).toEqual(expected); + }); + + it("doesn't find urls mangled with word characters", () => { + expect( + findGithubUrlReferences( + "foohttps://github.com/sourcecred/sourcecred/pull/94" + ) + ).toHaveLength(0); + + expect( + findGithubUrlReferences( + "https://github.com/sourcecred/sourcecred/pull/94foo" + ) + ).toHaveLength(0); + + expect( + findGithubUrlReferences( + "(https://github.com/sourcecred/sourcecred/pull/94)" + ) + ).toHaveLength(1); + }); +});