Properly detect space-separated GitHub references (#478)
Summary: The regular expressions used to detect GitHub references were of the form `/(?:\W|^)(things)(?:\W|$)/gm`, where the outer non-capturing groups were intended to enforce a word boundary constraint. However, this caused reference detection in strings like `"#1 #2"` to fail, because the putative matches would be `"#1 "` and `" #2"`, but these matches overlap, and the JavaScript RegExp API (like most such APIs) finds only non-overlapping matches. Therefore, in a string of space-separated references of the same kind, only every other reference would be detected. A solution is to use a positive lookahead instead of the second non-capturing group: i.e., `/(?:\W|$)/` becomes `/(?=\W|$)/`. (Ideally, the first non-capturing group would just be a lookbehind, but JavaScript doesn’t support those.) In some cases, using `\b` is a simpler solution. But this does not work in all cases: for instance, it works for repo-numeric references, but does not work for numeric references, because the presence of the hash means that there cannot be an immediately preceding word boundary. For consistency, I opted to use the lookahead solution in all cases, but any solution that passes tests is okay with me. Test Plan: Regression tests added. They fail before this patch and pass with it. wchargin-branch: fix-space-separated-github-references
This commit is contained in:
parent
6ab78b85da
commit
bc9e94b2a1
|
@ -20,7 +20,7 @@ export function parseReferences(body: string): ParsedReference[] {
|
|||
|
||||
function findRepoNumericReferences(body: string): ParsedReference[] {
|
||||
return findAllMatches(
|
||||
/(?:\W|^)([a-zA-Z0-9-]+\/[a-zA-Z0-9-]+#\d+)(?:\W|$)/gm,
|
||||
/(?:\W|^)([a-zA-Z0-9-]+\/[a-zA-Z0-9-]+#\d+)(?=\W|$)/gm,
|
||||
body
|
||||
).map((x) => ({
|
||||
refType: "BASIC",
|
||||
|
@ -29,7 +29,7 @@ function findRepoNumericReferences(body: string): ParsedReference[] {
|
|||
}
|
||||
|
||||
function findNumericReferences(body: string): ParsedReference[] {
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/gm, body).map((x) => ({
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?=\W|$)/gm, body).map((x) => ({
|
||||
refType: "BASIC",
|
||||
ref: x[1],
|
||||
}));
|
||||
|
@ -37,11 +37,11 @@ function findNumericReferences(body: string): ParsedReference[] {
|
|||
|
||||
function findUsernameReferences(body: string): ParsedReference[] {
|
||||
const pairedWithRefs = findAllMatches(
|
||||
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/gm,
|
||||
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?=\W|$)/gm,
|
||||
body
|
||||
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
|
||||
const basicRefs = findAllMatches(
|
||||
/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/gm,
|
||||
/(?:\W|^)(@[a-zA-Z0-9-]+)(?=\W|$)/gm,
|
||||
body
|
||||
).map((x) => ({ref: x[1], refType: "BASIC"}));
|
||||
for (const {ref} of pairedWithRefs) {
|
||||
|
@ -71,7 +71,7 @@ function findGithubUrlReferences(body: string): ParsedReference[] {
|
|||
.source +
|
||||
")?" +
|
||||
")" +
|
||||
/(?:[^\w/]|$)/.source,
|
||||
/(?=[^\w/]|$)/.source,
|
||||
"gm"
|
||||
);
|
||||
return findAllMatches(urlRegex, body).map((match) => ({
|
||||
|
|
|
@ -302,4 +302,62 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
|
|||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("finds references separated by a single space", () => {
|
||||
it("for repo-numeric references", () => {
|
||||
const f = (number: number) => `sourcecred/example-github#${number}`;
|
||||
const input = `${f(1)} ${f(2)} ${f(3)}`;
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{refType: "BASIC", ref: "sourcecred/example-github#1"},
|
||||
{refType: "BASIC", ref: "sourcecred/example-github#2"},
|
||||
{refType: "BASIC", ref: "sourcecred/example-github#3"},
|
||||
]);
|
||||
});
|
||||
it("for numeric references", () => {
|
||||
const f = (number: number) => `#${number}`;
|
||||
const input = `${f(1)} ${f(2)} ${f(3)}`;
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{refType: "BASIC", ref: "#1"},
|
||||
{refType: "BASIC", ref: "#2"},
|
||||
{refType: "BASIC", ref: "#3"},
|
||||
]);
|
||||
});
|
||||
it("for username references", () => {
|
||||
const f = (number: number) => `@user${number}`;
|
||||
const input = `${f(1)} ${f(2)} ${f(3)}`;
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{refType: "BASIC", ref: "@user1"},
|
||||
{refType: "BASIC", ref: "@user2"},
|
||||
{refType: "BASIC", ref: "@user3"},
|
||||
]);
|
||||
});
|
||||
it("for paired-with references", () => {
|
||||
const f = (number: number) => `Paired-with: @user${number}`;
|
||||
const input = `${f(1)} ${f(2)} ${f(3)}`;
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@user1"},
|
||||
{refType: "PAIRED_WITH", ref: "@user2"},
|
||||
{refType: "PAIRED_WITH", ref: "@user3"},
|
||||
]);
|
||||
});
|
||||
it("for GitHub URL references", () => {
|
||||
const f = (number: number) =>
|
||||
"https://github.com/sourcecred/example-github/issues/" + number;
|
||||
const input = `${f(1)} ${f(2)} ${f(3)}`;
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref: "https://github.com/sourcecred/example-github/issues/1",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref: "https://github.com/sourcecred/example-github/issues/2",
|
||||
},
|
||||
{
|
||||
refType: "BASIC",
|
||||
ref: "https://github.com/sourcecred/example-github/issues/3",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue