From c9a0d4b1b87d27a25252a1fb049213409dfe2fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Thu, 13 Sep 2018 18:21:06 -0700 Subject: [PATCH] Modify parseReferences to detect refs to commits (#832) We add a new function, `findCommitReferences`, which can find both explicit url references to commits, and commit hashes. Since the commit url includes the commit hash, some extra logic is added to deduplicate them in this instance. Tests verify that this is done properly. Test plan: Unit tests cover the cases of having commit hashes, having commit urls, and having both at once. --- src/plugins/github/parseReferences.js | 42 ++++++++++++++++ src/plugins/github/parseReferences.test.js | 58 ++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/src/plugins/github/parseReferences.js b/src/plugins/github/parseReferences.js index 919399a..5c2d307 100644 --- a/src/plugins/github/parseReferences.js +++ b/src/plugins/github/parseReferences.js @@ -29,6 +29,7 @@ function parseReferencesFromRawString(textBlock: string): ParsedReference[] { ...findRepoNumericReferences(textBlock), ...findGithubUrlReferences(textBlock), ...findUsernameReferences(textBlock), + ...findCommitReferences(textBlock), ]; } @@ -98,6 +99,47 @@ function findGithubUrlReferences(textBlock: string): ParsedReference[] { })); } +function findCommitReferences(textBlock: string): ParsedReference[] { + const hashReferences = findAllMatches( + /(?:\W|^)([a-fA-F0-9]{40,})(?=\W|$)/gm, + textBlock + ).map((x) => ({ + refType: "BASIC", + ref: x[1].toLowerCase(), + })); + const commitUrlRegex = new RegExp( + "" + + /(?:\W|^)/.source + + "(" + + /https?:\/\/github.com\//.source + + `(?:${githubOwnerPattern})` + + /\//.source + + `(?:${githubRepoPattern})` + + /\/commit\/([a-fA-F0-9]{40,})/.source + + ")" + + /(?=[^\w/]|$)/.source, + "gm" + ); + const urlsAndHashes = findAllMatches(commitUrlRegex, textBlock).map((x) => ({ + url: x[1].toLowerCase(), + hash: x[2].toLowerCase(), + })); + // Every urlReference will also generate a hash reference (because the + // url contains the hash). So we manually remove the duplicates. + for (const {url, hash} of urlsAndHashes) { + const idxToRemove = hashReferences.findIndex(({ref}) => ref === hash); + if (idxToRemove === -1) { + throw new Error(`No matching hash for url reference: ${url}`); + } + hashReferences.splice(idxToRemove, 1); + } + const urlReferences = urlsAndHashes.map((x) => ({ + refType: "BASIC", + ref: x.url, + })); + return [...hashReferences, ...urlReferences]; +} + function findAllMatches(re: RegExp, s: string): any[] { // modified from: https://stackoverflow.com/a/6323598 let m; diff --git a/src/plugins/github/parseReferences.test.js b/src/plugins/github/parseReferences.test.js index 93a38ed..f2bb407 100644 --- a/src/plugins/github/parseReferences.test.js +++ b/src/plugins/github/parseReferences.test.js @@ -109,6 +109,9 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/5#discussion_r171460198 A directly linked pull request comment: https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222 + + A directly linked commit: +https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34 `; const expected = [ @@ -150,6 +153,11 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222 ref: "https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222", }, + { + refType: "BASIC", + ref: + "https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34", + }, ]; expect(parseReferences(example)).toEqual(expected); @@ -182,6 +190,56 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222 ]); }); + describe("commit references", () => { + const commitHash = "6bd1b4c0b719c22c688a74863be07a699b7b9b34"; + const urlPrefix = "https://github.com/sourcecred/example-github/commit/"; + it("are valid if they are 40-chars long and all hex", () => { + expect(parseReferences(commitHash)).toEqual([ + {refType: "BASIC", ref: commitHash}, + ]); + }); + it("are canonicalized to lower-case capitalization", () => { + const capitalizedHash = "6BD1B4C0B719C22C688A74863be07a699b7b9b34"; + const capitalizedUrl = `${urlPrefix}${capitalizedHash}`; + expect(parseReferences(capitalizedHash)).toEqual([ + {refType: "BASIC", ref: commitHash}, + ]); + expect(parseReferences(capitalizedUrl)).toEqual([ + {refType: "BASIC", ref: `${urlPrefix}${commitHash}`}, + ]); + }); + it("are valid even if they have surrounding text", () => { + expect(parseReferences(`hello ${commitHash} world`)).toEqual([ + {refType: "BASIC", ref: commitHash}, + ]); + }); + it("are not valid if fewer than 40 chars long", () => { + expect(parseReferences(`bad: ${commitHash.slice(0, 38)}`)).toHaveLength( + 0 + ); + }); + it("are valid if they are 64 characters long", () => { + const hash64 = + "0000000000000000000000000000000000000000000000000000000000000000"; + expect(parseReferences(hash64)).toEqual([ + {ref: hash64, refType: "BASIC"}, + ]); + }); + it("are not valid if contain non-hex characters", () => { + expect( + parseReferences("6bd1b4c0b719c22c688a74863be07a699b7b9b3X") + ).toHaveLength(0); + }); + it("if there is a ref to the url and hash, they are both found once", () => { + const commitUrl = `https://github.com/sourcecred/example-github/commit/${commitHash}`; + const text = `${commitHash} ${commitUrl}`; + const result = parseReferences(text); + const hashRef = {refType: "BASIC", ref: commitHash}; + const urlRef = {refType: "BASIC", ref: commitUrl}; + expect(result).toEqual([hashRef, urlRef]); + }); + }); + it("finds username references", () => { expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([ {refType: "BASIC", ref: "@wchargin"},