Modify parseReferences to detect refs to commits (#832)

We add a new function, `findCommitReferences`, which can find both
explicit url references to commits, and commit hashes.

Since the commit url includes the commit hash, some extra logic is added
to deduplicate them in this instance. Tests verify that this is done
properly.

Test plan: Unit tests cover the cases of having commit hashes, having
commit urls, and having both at once.
This commit is contained in:
Dandelion Mané 2018-09-13 18:21:06 -07:00 committed by GitHub
parent 417cc231e9
commit c9a0d4b1b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 100 additions and 0 deletions

View File

@ -29,6 +29,7 @@ function parseReferencesFromRawString(textBlock: string): ParsedReference[] {
...findRepoNumericReferences(textBlock),
...findGithubUrlReferences(textBlock),
...findUsernameReferences(textBlock),
...findCommitReferences(textBlock),
];
}
@ -98,6 +99,47 @@ function findGithubUrlReferences(textBlock: string): ParsedReference[] {
}));
}
function findCommitReferences(textBlock: string): ParsedReference[] {
const hashReferences = findAllMatches(
/(?:\W|^)([a-fA-F0-9]{40,})(?=\W|$)/gm,
textBlock
).map((x) => ({
refType: "BASIC",
ref: x[1].toLowerCase(),
}));
const commitUrlRegex = new RegExp(
"" +
/(?:\W|^)/.source +
"(" +
/https?:\/\/github.com\//.source +
`(?:${githubOwnerPattern})` +
/\//.source +
`(?:${githubRepoPattern})` +
/\/commit\/([a-fA-F0-9]{40,})/.source +
")" +
/(?=[^\w/]|$)/.source,
"gm"
);
const urlsAndHashes = findAllMatches(commitUrlRegex, textBlock).map((x) => ({
url: x[1].toLowerCase(),
hash: x[2].toLowerCase(),
}));
// Every urlReference will also generate a hash reference (because the
// url contains the hash). So we manually remove the duplicates.
for (const {url, hash} of urlsAndHashes) {
const idxToRemove = hashReferences.findIndex(({ref}) => ref === hash);
if (idxToRemove === -1) {
throw new Error(`No matching hash for url reference: ${url}`);
}
hashReferences.splice(idxToRemove, 1);
}
const urlReferences = urlsAndHashes.map((x) => ({
refType: "BASIC",
ref: x.url,
}));
return [...hashReferences, ...urlReferences];
}
function findAllMatches(re: RegExp, s: string): any[] {
// modified from: https://stackoverflow.com/a/6323598
let m;

View File

@ -109,6 +109,9 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/5#discussion_r171460198
A directly linked pull request comment:
https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
A directly linked commit:
https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34
`;
const expected = [
@ -150,6 +153,11 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
ref:
"https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222",
},
{
refType: "BASIC",
ref:
"https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34",
},
];
expect(parseReferences(example)).toEqual(expected);
@ -182,6 +190,56 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
]);
});
describe("commit references", () => {
const commitHash = "6bd1b4c0b719c22c688a74863be07a699b7b9b34";
const urlPrefix = "https://github.com/sourcecred/example-github/commit/";
it("are valid if they are 40-chars long and all hex", () => {
expect(parseReferences(commitHash)).toEqual([
{refType: "BASIC", ref: commitHash},
]);
});
it("are canonicalized to lower-case capitalization", () => {
const capitalizedHash = "6BD1B4C0B719C22C688A74863be07a699b7b9b34";
const capitalizedUrl = `${urlPrefix}${capitalizedHash}`;
expect(parseReferences(capitalizedHash)).toEqual([
{refType: "BASIC", ref: commitHash},
]);
expect(parseReferences(capitalizedUrl)).toEqual([
{refType: "BASIC", ref: `${urlPrefix}${commitHash}`},
]);
});
it("are valid even if they have surrounding text", () => {
expect(parseReferences(`hello ${commitHash} world`)).toEqual([
{refType: "BASIC", ref: commitHash},
]);
});
it("are not valid if fewer than 40 chars long", () => {
expect(parseReferences(`bad: ${commitHash.slice(0, 38)}`)).toHaveLength(
0
);
});
it("are valid if they are 64 characters long", () => {
const hash64 =
"0000000000000000000000000000000000000000000000000000000000000000";
expect(parseReferences(hash64)).toEqual([
{ref: hash64, refType: "BASIC"},
]);
});
it("are not valid if contain non-hex characters", () => {
expect(
parseReferences("6bd1b4c0b719c22c688a74863be07a699b7b9b3X")
).toHaveLength(0);
});
it("if there is a ref to the url and hash, they are both found once", () => {
const commitUrl = `https://github.com/sourcecred/example-github/commit/${commitHash}`;
const text = `${commitHash} ${commitUrl}`;
const result = parseReferences(text);
const hashRef = {refType: "BASIC", ref: commitHash};
const urlRef = {refType: "BASIC", ref: commitUrl};
expect(result).toEqual([hashRef, urlRef]);
});
});
it("finds username references", () => {
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
{refType: "BASIC", ref: "@wchargin"},