Modify parseReferences to detect refs to commits (#832)
We add a new function, `findCommitReferences`, which can find both explicit url references to commits, and commit hashes. Since the commit url includes the commit hash, some extra logic is added to deduplicate them in this instance. Tests verify that this is done properly. Test plan: Unit tests cover the cases of having commit hashes, having commit urls, and having both at once.
This commit is contained in:
parent
417cc231e9
commit
c9a0d4b1b8
|
@ -29,6 +29,7 @@ function parseReferencesFromRawString(textBlock: string): ParsedReference[] {
|
||||||
...findRepoNumericReferences(textBlock),
|
...findRepoNumericReferences(textBlock),
|
||||||
...findGithubUrlReferences(textBlock),
|
...findGithubUrlReferences(textBlock),
|
||||||
...findUsernameReferences(textBlock),
|
...findUsernameReferences(textBlock),
|
||||||
|
...findCommitReferences(textBlock),
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,6 +99,47 @@ function findGithubUrlReferences(textBlock: string): ParsedReference[] {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function findCommitReferences(textBlock: string): ParsedReference[] {
|
||||||
|
const hashReferences = findAllMatches(
|
||||||
|
/(?:\W|^)([a-fA-F0-9]{40,})(?=\W|$)/gm,
|
||||||
|
textBlock
|
||||||
|
).map((x) => ({
|
||||||
|
refType: "BASIC",
|
||||||
|
ref: x[1].toLowerCase(),
|
||||||
|
}));
|
||||||
|
const commitUrlRegex = new RegExp(
|
||||||
|
"" +
|
||||||
|
/(?:\W|^)/.source +
|
||||||
|
"(" +
|
||||||
|
/https?:\/\/github.com\//.source +
|
||||||
|
`(?:${githubOwnerPattern})` +
|
||||||
|
/\//.source +
|
||||||
|
`(?:${githubRepoPattern})` +
|
||||||
|
/\/commit\/([a-fA-F0-9]{40,})/.source +
|
||||||
|
")" +
|
||||||
|
/(?=[^\w/]|$)/.source,
|
||||||
|
"gm"
|
||||||
|
);
|
||||||
|
const urlsAndHashes = findAllMatches(commitUrlRegex, textBlock).map((x) => ({
|
||||||
|
url: x[1].toLowerCase(),
|
||||||
|
hash: x[2].toLowerCase(),
|
||||||
|
}));
|
||||||
|
// Every urlReference will also generate a hash reference (because the
|
||||||
|
// url contains the hash). So we manually remove the duplicates.
|
||||||
|
for (const {url, hash} of urlsAndHashes) {
|
||||||
|
const idxToRemove = hashReferences.findIndex(({ref}) => ref === hash);
|
||||||
|
if (idxToRemove === -1) {
|
||||||
|
throw new Error(`No matching hash for url reference: ${url}`);
|
||||||
|
}
|
||||||
|
hashReferences.splice(idxToRemove, 1);
|
||||||
|
}
|
||||||
|
const urlReferences = urlsAndHashes.map((x) => ({
|
||||||
|
refType: "BASIC",
|
||||||
|
ref: x.url,
|
||||||
|
}));
|
||||||
|
return [...hashReferences, ...urlReferences];
|
||||||
|
}
|
||||||
|
|
||||||
function findAllMatches(re: RegExp, s: string): any[] {
|
function findAllMatches(re: RegExp, s: string): any[] {
|
||||||
// modified from: https://stackoverflow.com/a/6323598
|
// modified from: https://stackoverflow.com/a/6323598
|
||||||
let m;
|
let m;
|
||||||
|
|
|
@ -109,6 +109,9 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/5#discussion_r171460198
|
||||||
|
|
||||||
A directly linked pull request comment:
|
A directly linked pull request comment:
|
||||||
https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
|
https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
|
||||||
|
|
||||||
|
A directly linked commit:
|
||||||
|
https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const expected = [
|
const expected = [
|
||||||
|
@ -150,6 +153,11 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
|
||||||
ref:
|
ref:
|
||||||
"https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222",
|
"https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
refType: "BASIC",
|
||||||
|
ref:
|
||||||
|
"https://github.com/sourcecred/exa_mple-git.hub/commit/6bd1b4c0b719c22c688a74863be07a699b7b9b34",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
expect(parseReferences(example)).toEqual(expected);
|
expect(parseReferences(example)).toEqual(expected);
|
||||||
|
@ -182,6 +190,56 @@ https://github.com/sourcecred/exa_mple-git.hub/pull/3#issuecomment-369162222
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("commit references", () => {
|
||||||
|
const commitHash = "6bd1b4c0b719c22c688a74863be07a699b7b9b34";
|
||||||
|
const urlPrefix = "https://github.com/sourcecred/example-github/commit/";
|
||||||
|
it("are valid if they are 40-chars long and all hex", () => {
|
||||||
|
expect(parseReferences(commitHash)).toEqual([
|
||||||
|
{refType: "BASIC", ref: commitHash},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("are canonicalized to lower-case capitalization", () => {
|
||||||
|
const capitalizedHash = "6BD1B4C0B719C22C688A74863be07a699b7b9b34";
|
||||||
|
const capitalizedUrl = `${urlPrefix}${capitalizedHash}`;
|
||||||
|
expect(parseReferences(capitalizedHash)).toEqual([
|
||||||
|
{refType: "BASIC", ref: commitHash},
|
||||||
|
]);
|
||||||
|
expect(parseReferences(capitalizedUrl)).toEqual([
|
||||||
|
{refType: "BASIC", ref: `${urlPrefix}${commitHash}`},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("are valid even if they have surrounding text", () => {
|
||||||
|
expect(parseReferences(`hello ${commitHash} world`)).toEqual([
|
||||||
|
{refType: "BASIC", ref: commitHash},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("are not valid if fewer than 40 chars long", () => {
|
||||||
|
expect(parseReferences(`bad: ${commitHash.slice(0, 38)}`)).toHaveLength(
|
||||||
|
0
|
||||||
|
);
|
||||||
|
});
|
||||||
|
it("are valid if they are 64 characters long", () => {
|
||||||
|
const hash64 =
|
||||||
|
"0000000000000000000000000000000000000000000000000000000000000000";
|
||||||
|
expect(parseReferences(hash64)).toEqual([
|
||||||
|
{ref: hash64, refType: "BASIC"},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
it("are not valid if contain non-hex characters", () => {
|
||||||
|
expect(
|
||||||
|
parseReferences("6bd1b4c0b719c22c688a74863be07a699b7b9b3X")
|
||||||
|
).toHaveLength(0);
|
||||||
|
});
|
||||||
|
it("if there is a ref to the url and hash, they are both found once", () => {
|
||||||
|
const commitUrl = `https://github.com/sourcecred/example-github/commit/${commitHash}`;
|
||||||
|
const text = `${commitHash} ${commitUrl}`;
|
||||||
|
const result = parseReferences(text);
|
||||||
|
const hashRef = {refType: "BASIC", ref: commitHash};
|
||||||
|
const urlRef = {refType: "BASIC", ref: commitUrl};
|
||||||
|
expect(result).toEqual([hashRef, urlRef]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("finds username references", () => {
|
it("finds username references", () => {
|
||||||
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
|
expect(parseReferences("hello to @wchargin from @decentralion!")).toEqual([
|
||||||
{refType: "BASIC", ref: "@wchargin"},
|
{refType: "BASIC", ref: "@wchargin"},
|
||||||
|
|
Loading…
Reference in New Issue