mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-05 01:04:53 +00:00
Parse GitHub references from Markdown text nodes (#480)
Summary: See #432. This commit switches the GitHub Markdown parser from matching regular expressions against the raw post body to matching the same regular expressions against the semantic text of a Markdown document. See test cases for `parseReferences` and `parseMarkdown` for more details. There are no changes to snapshots or cached GitHub data because the Markdown in the example repository is simple enough to be properly parsed by a simple approach that treats everything as literal text. The change to the “finds numeric references in a multiline string” test case is to strip leading indentation from the string in question. As previously written, the test had four spaces of leading indentation on each line, causing the Markdown parser to interpret it as a code block, in turn causing our logic to (correctly) omit it entirely. The revised version of the test has no leading indentation. Test Plan: Light unit tests included; these tests are intended to verify that the actual Markdown logic from `parseMarkdown` is being used, and that file has more extensive tests. Additionally, `yarn travis --full` passes. wchargin-branch: github-parse-markdown
This commit is contained in:
parent
a9600d0379
commit
56d48e254c
@ -1,48 +1,61 @@
|
||||
// @flow
|
||||
|
||||
import {textBlocks} from "./parseMarkdown";
|
||||
|
||||
export type ParsedReference = {|
|
||||
// "@user" or "#123" or "https://github.com/owner/name/..."
|
||||
+ref: string,
|
||||
+refType: "BASIC" | "PAIRED_WITH",
|
||||
|};
|
||||
|
||||
/**
|
||||
* Parse GitHub references from a Markdown document, such as an issue or
|
||||
* comment body. This will include references that span multiple lines
|
||||
* (across softbreaks), and exclude references that occur within code
|
||||
* blocks.
|
||||
*/
|
||||
export function parseReferences(body: string): ParsedReference[] {
|
||||
// Note to maintainer: If it becomes necessary to encode references in a
|
||||
// richer format, consider implementing the type signature described in
|
||||
// https://github.com/sourcecred/sourcecred/pull/130#pullrequestreview-113849998
|
||||
const blocks = textBlocks(body);
|
||||
return [].concat.apply([], blocks.map(parseReferencesFromRawString));
|
||||
}
|
||||
|
||||
function parseReferencesFromRawString(textBlock: string): ParsedReference[] {
|
||||
return [
|
||||
...findNumericReferences(body),
|
||||
...findRepoNumericReferences(body),
|
||||
...findGithubUrlReferences(body),
|
||||
...findUsernameReferences(body),
|
||||
...findNumericReferences(textBlock),
|
||||
...findRepoNumericReferences(textBlock),
|
||||
...findGithubUrlReferences(textBlock),
|
||||
...findUsernameReferences(textBlock),
|
||||
];
|
||||
}
|
||||
|
||||
function findRepoNumericReferences(body: string): ParsedReference[] {
|
||||
function findRepoNumericReferences(textBlock: string): ParsedReference[] {
|
||||
return findAllMatches(
|
||||
/(?:\W|^)([a-zA-Z0-9-]+\/[a-zA-Z0-9-]+#\d+)(?=\W|$)/gm,
|
||||
body
|
||||
textBlock
|
||||
).map((x) => ({
|
||||
refType: "BASIC",
|
||||
ref: x[1],
|
||||
}));
|
||||
}
|
||||
|
||||
function findNumericReferences(body: string): ParsedReference[] {
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?=\W|$)/gm, body).map((x) => ({
|
||||
function findNumericReferences(textBlock: string): ParsedReference[] {
|
||||
return findAllMatches(/(?:\W|^)(#\d+)(?=\W|$)/gm, textBlock).map((x) => ({
|
||||
refType: "BASIC",
|
||||
ref: x[1],
|
||||
}));
|
||||
}
|
||||
|
||||
function findUsernameReferences(body: string): ParsedReference[] {
|
||||
function findUsernameReferences(textBlock: string): ParsedReference[] {
|
||||
const pairedWithRefs = findAllMatches(
|
||||
/(?:\W|^)(?:P|p)aired(?:-| )(?:w|W)ith:? (@[a-zA-Z0-9-]+)(?=\W|$)/gm,
|
||||
body
|
||||
textBlock
|
||||
).map((x) => ({ref: x[1], refType: "PAIRED_WITH"}));
|
||||
const basicRefs = findAllMatches(
|
||||
/(?:\W|^)(@[a-zA-Z0-9-]+)(?=\W|$)/gm,
|
||||
body
|
||||
textBlock
|
||||
).map((x) => ({ref: x[1], refType: "BASIC"}));
|
||||
for (const {ref} of pairedWithRefs) {
|
||||
const basicRefIndexToRemove = basicRefs.findIndex((x) => x.ref === ref);
|
||||
@ -54,7 +67,7 @@ function findUsernameReferences(body: string): ParsedReference[] {
|
||||
return [...pairedWithRefs, ...basicRefs];
|
||||
}
|
||||
|
||||
function findGithubUrlReferences(body: string): ParsedReference[] {
|
||||
function findGithubUrlReferences(textBlock: string): ParsedReference[] {
|
||||
const githubNamePart = /(?:[a-zA-Z0-9_-]+)/.source;
|
||||
const urlRegex = new RegExp(
|
||||
"" +
|
||||
@ -74,7 +87,7 @@ function findGithubUrlReferences(body: string): ParsedReference[] {
|
||||
/(?=[^\w/]|$)/.source,
|
||||
"gm"
|
||||
);
|
||||
return findAllMatches(urlRegex, body).map((match) => ({
|
||||
return findAllMatches(urlRegex, textBlock).map((match) => ({
|
||||
refType: "BASIC",
|
||||
ref: match[1],
|
||||
}));
|
||||
|
@ -17,10 +17,11 @@ describe("parseReferences", () => {
|
||||
});
|
||||
|
||||
it("finds numeric references in a multiline string", () => {
|
||||
const example = `
|
||||
This is a multiline string.
|
||||
It refers to #1. Oh, and to #2 too.
|
||||
(#42 might be included too - who knows?)`;
|
||||
const example = [
|
||||
"This is a multiline string.",
|
||||
"It refers to #1. Oh, and to #2 too.",
|
||||
"(#42 might be included too - who knows?)",
|
||||
].join("\n");
|
||||
expect(parseReferences(example)).toEqual([
|
||||
{refType: "BASIC", ref: "#1"},
|
||||
{refType: "BASIC", ref: "#2"},
|
||||
@ -32,6 +33,37 @@ describe("parseReferences", () => {
|
||||
expect(parseReferences("foo#123 #124bar")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does not find references in inline code", () => {
|
||||
const input = "text like `#1` means issue";
|
||||
expect(parseReferences(input)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does not find references in inline code with lots of backticks", () => {
|
||||
// An attempt to evade inline code with regular expressions might
|
||||
// well fail here, because an even number of backticks appears on
|
||||
// each side of the would-be reference.
|
||||
const input = "text like ````#1```` means issue";
|
||||
expect(parseReferences(input)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does not find references in indented-block code", () => {
|
||||
const input = "text like\n\n #1\n\nmeans issue";
|
||||
expect(parseReferences(input)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does not find references in fenced-block code", () => {
|
||||
const input = "text like\n\n```\n#1\n```\n\nmeans issue";
|
||||
expect(parseReferences(input)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("finds references with mixed formatting", () => {
|
||||
const input = "*Paired* with @alphonse, but *reviewed* by @betty.";
|
||||
expect(parseReferences(input)).toEqual([
|
||||
{refType: "PAIRED_WITH", ref: "@alphonse"},
|
||||
{refType: "BASIC", ref: "@betty"},
|
||||
]);
|
||||
});
|
||||
|
||||
describe("cross-repo links", () => {
|
||||
const repoRef = "sourcecred/sourcecred#12";
|
||||
it("a bare link", () => {
|
||||
|
Loading…
x
Reference in New Issue
Block a user