Properly detect space-separated GitHub references (#478)

Summary:
The regular expressions used to detect GitHub references were of the
form `/(?:\W|^)(things)(?:\W|$)/gm`, where the outer non-capturing
groups were intended to enforce a word boundary constraint. However,
this caused reference detection in strings like `"#1 #2"` to fail,
because the putative matches would be `"#1 "` and `" #2"`, but these
matches overlap, and the JavaScript RegExp API (like most such APIs)
finds only non-overlapping matches. Therefore, in a string of
space-separated references of the same kind, only every other reference
would be detected.

A solution is to use a positive lookahead instead of the second
non-capturing group: i.e., `/(?:\W|$)/` becomes `/(?=\W|$)/`. (Ideally,
the first non-capturing group would just be a lookbehind, but JavaScript
doesn’t support those.)

In some cases, using `\b` is a simpler solution. But this does not work
in all cases: for instance, it works for repo-numeric references, but
does not work for numeric references, because the presence of the hash
means that there cannot be an immediately preceding word boundary. For
consistency, I opted to use the lookahead solution in all cases, but any
solution that passes tests is okay with me.

Test Plan:
Regression tests added. They fail before this patch and pass with it.

wchargin-branch: fix-space-separated-github-references
This commit is contained in:
William Chargin 2018-07-03 11:32:13 -07:00 committed by GitHub
parent 6ab78b85da
commit bc9e94b2a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 5 deletions

View File

@ -20,7 +20,7 @@ export function parseReferences(body: string): ParsedReference[] {
function findRepoNumericReferences(body: string): ParsedReference[] { function findRepoNumericReferences(body: string): ParsedReference[] {
return findAllMatches( return findAllMatches(
/(?:\W|^)([a-zA-Z0-9-]+\/[a-zA-Z0-9-]+#\d+)(?:\W|$)/gm, /(?:\W|^)([a-zA-Z0-9-]+\/[a-zA-Z0-9-]+#\d+)(?=\W|$)/gm,
body body
).map((x) => ({ ).map((x) => ({
refType: "BASIC", refType: "BASIC",
@ -29,7 +29,7 @@ function findRepoNumericReferences(body: string): ParsedReference[] {
} }
function findNumericReferences(body: string): ParsedReference[] { function findNumericReferences(body: string): ParsedReference[] {
return findAllMatches(/(?:\W|^)(#\d+)(?:\W|$)/gm, body).map((x) => ({ return findAllMatches(/(?:\W|^)(#\d+)(?=\W|$)/gm, body).map((x) => ({
refType: "BASIC", refType: "BASIC",
ref: x[1], ref: x[1],
})); }));
@ -37,11 +37,11 @@ function findNumericReferences(body: string): ParsedReference[] {
function findUsernameReferences(body: string): ParsedReference[] { function findUsernameReferences(body: string): ParsedReference[] {
const pairedWithRefs = findAllMatches( const pairedWithRefs = findAllMatches(
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?:\W|$)/gm, /(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?=\W|$)/gm,
body body
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"})); ).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
const basicRefs = findAllMatches( const basicRefs = findAllMatches(
/(?:\W|^)(@[a-zA-Z0-9-]+)(?:\W|$)/gm, /(?:\W|^)(@[a-zA-Z0-9-]+)(?=\W|$)/gm,
body body
).map((x) => ({ref: x[1], refType: "BASIC"})); ).map((x) => ({ref: x[1], refType: "BASIC"}));
for (const {ref} of pairedWithRefs) { for (const {ref} of pairedWithRefs) {
@ -71,7 +71,7 @@ function findGithubUrlReferences(body: string): ParsedReference[] {
.source + .source +
")?" + ")?" +
")" + ")" +
/(?:[^\w/]|$)/.source, /(?=[^\w/]|$)/.source,
"gm" "gm"
); );
return findAllMatches(urlRegex, body).map((match) => ({ return findAllMatches(urlRegex, body).map((match) => ({

View File

@ -302,4 +302,62 @@ https://github.com/sourcecred/example-github/pull/3#issuecomment-369162222
]); ]);
}); });
}); });
describe("finds references separated by a single space", () => {
it("for repo-numeric references", () => {
const f = (number: number) => `sourcecred/example-github#${number}`;
const input = `${f(1)} ${f(2)} ${f(3)}`;
expect(parseReferences(input)).toEqual([
{refType: "BASIC", ref: "sourcecred/example-github#1"},
{refType: "BASIC", ref: "sourcecred/example-github#2"},
{refType: "BASIC", ref: "sourcecred/example-github#3"},
]);
});
it("for numeric references", () => {
const f = (number: number) => `#${number}`;
const input = `${f(1)} ${f(2)} ${f(3)}`;
expect(parseReferences(input)).toEqual([
{refType: "BASIC", ref: "#1"},
{refType: "BASIC", ref: "#2"},
{refType: "BASIC", ref: "#3"},
]);
});
it("for username references", () => {
const f = (number: number) => `@user${number}`;
const input = `${f(1)} ${f(2)} ${f(3)}`;
expect(parseReferences(input)).toEqual([
{refType: "BASIC", ref: "@user1"},
{refType: "BASIC", ref: "@user2"},
{refType: "BASIC", ref: "@user3"},
]);
});
it("for paired-with references", () => {
const f = (number: number) => `Paired-with: @user${number}`;
const input = `${f(1)} ${f(2)} ${f(3)}`;
expect(parseReferences(input)).toEqual([
{refType: "PAIRED_WITH", ref: "@user1"},
{refType: "PAIRED_WITH", ref: "@user2"},
{refType: "PAIRED_WITH", ref: "@user3"},
]);
});
it("for GitHub URL references", () => {
const f = (number: number) =>
"https://github.com/sourcecred/example-github/issues/" + number;
const input = `${f(1)} ${f(2)} ${f(3)}`;
expect(parseReferences(input)).toEqual([
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/issues/1",
},
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/issues/2",
},
{
refType: "BASIC",
ref: "https://github.com/sourcecred/example-github/issues/3",
},
]);
});
});
}); });