From 41cdf2d855c75c48aa81f6cf08fb4e6c2ac1fff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dandelion=20Man=C3=A9?=
Date: Tue, 20 Mar 2018 18:10:03 -0700
Subject: [PATCH] Implement GitHub reference detection (#98)
This commit only adds logic for finding references in GitHub posts,
either by #-numeric reference, or explicit urls. Adding the reference
edges to the graph will occur in a followon commit.
Test plan: New unit tests are included
---
src/plugins/github/parseReferences.js | 65 ++++++++
src/plugins/github/parseReferences.test.js | 170 +++++++++++++++++++++
2 files changed, 235 insertions(+)
create mode 100644 src/plugins/github/parseReferences.js
create mode 100644 src/plugins/github/parseReferences.test.js
diff --git a/src/plugins/github/parseReferences.js b/src/plugins/github/parseReferences.js
new file mode 100644
index 0000000..41d0fae
--- /dev/null
+++ b/src/plugins/github/parseReferences.js
@@ -0,0 +1,65 @@
+// @flow
+
+function findAllMatches(re: RegExp, s: string): any[] {
+ // modified from: https://stackoverflow.com/a/6323598
+ let m;
+ const matches = [];
+ do {
+ m = re.exec(s);
+ if (m) {
+ matches.push(m);
+ }
+ } while (m);
+ return matches;
+}
+
+export function findNumericReferences(body: string): number[] {
+ return findAllMatches(/(?:\W|^)#(\d+)(?:\W|$)/g, body).map((x) => +x[1]);
+}
+
+export type GithubUrlMatch = {|
+ +repoName: string,
+ +repoOwner: string,
+ +parentType: "pull" | "issues",
+ +number: number,
+ +commentFragment: ?{|
+ +fragmentType:
+ | "issue" // a directly linked issue or pull request
+ | "issuecomment" // a directly linked regular comment on issue or pull request
+ | "pullrequestreview" // a pull request review
+ | "discussion_r", // a review comment as part of a pull request review
+ +fragmentNumber: number,
+ |},
+|};
+
+export function findGithubUrlReferences(body: string): GithubUrlMatch[] {
+ const githubNamePart = /([a-zA-Z0-9_-]+)/.source;
+ const urlRegex = new RegExp(
+ "" +
+ /(?:\W|^)http(?:s)?:\/\/github.com\//.source +
+ githubNamePart +
+ /\//.source +
+ githubNamePart +
+ /\/(issues|pull)\//.source +
+ /(\d+)/.source +
+ /(#(issue|issuecomment|pullrequestreview|discussion_r)-?(\d+))?/.source +
+ /(?:\W|$)/.source,
+ "gm"
+ );
+ return findAllMatches(urlRegex, body).map((match) => {
+ let commentFragment: $ElementType;
+ if (match[5] != null) {
+ // we found a comment fragment
+ commentFragment = {fragmentType: match[6], fragmentNumber: +match[7]};
+ } else {
+ commentFragment = null;
+ }
+ return {
+ repoOwner: match[1],
+ repoName: match[2],
+ parentType: match[3],
+ number: +match[4],
+ commentFragment,
+ };
+ });
+}
diff --git a/src/plugins/github/parseReferences.test.js b/src/plugins/github/parseReferences.test.js
new file mode 100644
index 0000000..a6d51a9
--- /dev/null
+++ b/src/plugins/github/parseReferences.test.js
@@ -0,0 +1,170 @@
+// @flow
+
+import {
+ findNumericReferences,
+ findGithubUrlReferences,
+} from "./parseReferences.js";
+import type {GithubUrlMatch} from "./parseReferences.js";
+
+describe("reference finding", () => {
+ it("finds no numeric references when not present", () => {
+ expect(findNumericReferences("foo bar bod boink")).toHaveLength(0);
+ expect(findNumericReferences("")).toHaveLength(0);
+ });
+
+ it("finds trivial numeric references", () => {
+ expect(findNumericReferences("#1, #2, and #3")).toEqual([1, 2, 3]);
+ });
+
+ it("finds numeric references in a multiline string", () => {
+ const example = `
+ This is a multiline string.
+ It refers to #1. Oh, and to #2 too.
+ (#42 might be included too - who knows?)`;
+ expect(findNumericReferences(example)).toEqual([1, 2, 42]);
+ });
+
+ it("does not find bad references", () => {
+ expect(findNumericReferences("foo#123 #124bar")).toHaveLength(0);
+ });
+
+ it("does not yet find concise cross-repo links", () => {
+ // The link below is valid, when we add cross-repo support we
+ // should fix this test case
+ expect(findNumericReferences("sourcecred/sourcecred#12")).toHaveLength(0);
+ });
+
+ it("finds no url references when not present", () => {
+ expect(findGithubUrlReferences("foo bar bod boink")).toHaveLength(0);
+ expect(findGithubUrlReferences("")).toHaveLength(0);
+ });
+
+ it("finds a trivial url reference", () => {
+ expect(
+ findGithubUrlReferences(
+ "https://github.com/sourcecred/sourcecred/issues/86"
+ )
+ ).toHaveLength(1);
+ });
+
+ it("parses url references appropriately", () => {
+ const example = `
+ A directly linked issue:
+https://github.com/sourcecred/example-repo/issues/1
+
+ A directly linked issue with fragment:
+https://github.com/sourcecred/example-repo/issues/1#issue-300934818
+
+ A directly linked pull request:
+https://github.com/sourcecred/example-repo/pull/3
+
+ A directly linked pull request with fragment:
+https://github.com/sourcecred/example-repo/pull/3#issue-171887741
+
+ A directly linked issue comment:
+https://github.com/sourcecred/example-repo/issues/6#issuecomment-373768442
+
+ A directly linked pull request review:
+https://github.com/sourcecred/example-repo/pull/5#pullrequestreview-100313899
+
+ A directly linked pull request review comment:
+https://github.com/sourcecred/example-repo/pull/5#discussion_r171460198
+
+ A directly linked pull request comment:
+https://github.com/sourcecred/example-repo/pull/3#issuecomment-369162222
+ `;
+
+ const expected = [
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "issues",
+ number: 1,
+ commentFragment: null,
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "issues",
+ number: 1,
+ commentFragment: {fragmentType: "issue", fragmentNumber: 300934818},
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "pull",
+ number: 3,
+ commentFragment: null,
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "pull",
+ number: 3,
+ commentFragment: {fragmentType: "issue", fragmentNumber: 171887741},
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "issues",
+ number: 6,
+ commentFragment: {
+ fragmentType: "issuecomment",
+ fragmentNumber: 373768442,
+ },
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "pull",
+ number: 5,
+ commentFragment: {
+ fragmentType: "pullrequestreview",
+ fragmentNumber: 100313899,
+ },
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "pull",
+ number: 5,
+ commentFragment: {
+ fragmentType: "discussion_r",
+ fragmentNumber: 171460198,
+ },
+ },
+ {
+ repoName: "example-repo",
+ repoOwner: "sourcecred",
+ parentType: "pull",
+ number: 3,
+ commentFragment: {
+ fragmentType: "issuecomment",
+ fragmentNumber: 369162222,
+ },
+ },
+ ];
+
+ expect(findGithubUrlReferences(example)).toEqual(expected);
+ });
+
+ it("doesn't find urls mangled with word characters", () => {
+ expect(
+ findGithubUrlReferences(
+ "foohttps://github.com/sourcecred/sourcecred/pull/94"
+ )
+ ).toHaveLength(0);
+
+ expect(
+ findGithubUrlReferences(
+ "https://github.com/sourcecred/sourcecred/pull/94foo"
+ )
+ ).toHaveLength(0);
+
+ expect(
+ findGithubUrlReferences(
+ "(https://github.com/sourcecred/sourcecred/pull/94)"
+ )
+ ).toHaveLength(1);
+ });
+});