diff --git a/src/plugins/github/parseMarkdown.js b/src/plugins/github/parseMarkdown.js index e933717..085ade2 100644 --- a/src/plugins/github/parseMarkdown.js +++ b/src/plugins/github/parseMarkdown.js @@ -1,6 +1,7 @@ // @flow import {Node, Parser} from "commonmark"; +import {OPENTAG, CLOSETAG} from "commonmark/lib/common"; /** * Extract maximal contiguous blocks of text from a Markdown string, in @@ -66,9 +67,35 @@ type NodeType = export function deformat(ast: Node): void { const walker = ast.walker(); + // We ignore the contents of HTML "code" elements and their subtrees. + // This variable tracks how deep we are in such a tree. It is 0 if we + // are not in such a tree, 1 if we are in a "code" element, 2 if we + // are in an element inside a "code" element, etc. + let htmlDepth: number = 0; + const reOpenCodeTag = /^])/i; + const reOpenTag = new RegExp(`^(?:${OPENTAG})`); + const reCloseTag = new RegExp(`^(?:${CLOSETAG})`); + for (let step; (step = walker.next()); ) { const node: Node = step.node; const type: NodeType = node.type; + if (htmlDepth > 0) { + if (type === "html_inline") { + if (reOpenTag.test(node.literal)) { + htmlDepth++; + } else if (reCloseTag.test(node.literal)) { + htmlDepth--; + } + } + // The AST walker gets into a broken state if you unlink a node + // that has children before those children have been visited. We + // only unlink when leaving a node, or when entering a node that + // has no children. + if (!step.entering || node.firstChild == null) { + node.unlink(); + continue; + } + } switch (type) { case "text": break; @@ -94,6 +121,11 @@ export function deformat(ast: Node): void { } break; case "html_inline": + if (reOpenCodeTag.test(node.literal)) { + htmlDepth++; // should have been 0 previously + } + node.unlink(); + break; case "code": case "document": case "paragraph": diff --git a/src/plugins/github/parseMarkdown.test.js b/src/plugins/github/parseMarkdown.test.js index 4487857..a69a6f1 100644 --- a/src/plugins/github/parseMarkdown.test.js +++ b/src/plugins/github/parseMarkdown.test.js @@ -50,6 +50,54 @@ describe("plugins/github/parseMarkdown", () => { ]; expect(textBlocks(input)).toEqual(expected); }); + + it("includes text inside of non-code HTML elements", () => { + const input = "My #1 pal"; + expect(textBlocks(input)).toEqual(["My #1 pal"]); + }); + + it('strips HTML "code" elements', () => { + const input = "My #1 pal"; + expect(textBlocks(input)).toEqual(["My pal"]); + }); + + it('strips subtrees rooted at HTML "code" elements', () => { + const input = "My #1 *and* #2 pals"; + expect(textBlocks(input)).toEqual(["My pals"]); + }); + + it('strips "code" elements within "code" elements', () => { + const input = "see #1 and #2 okay"; + expect(textBlocks(input)).toEqual(["see "]); + }); + + it('handles comments and CDATA within "code" elements', () => { + // These are "html_inline" nodes, but are not HTML elements. They + // may contain closing-tag sequences, but these do not actually + // close a tag. + const input = [ + "note", + "alpha ", + " ", + " echo]]> ", + "foxtrot ", + "well", + ].join(""); + expect(textBlocks(input)).toEqual(["note well"]); + }); + + it('strips HTML "pre" blocks and subtrees', () => { + // "pre" is not handled specially; all blocks are skipped. + const input = + "Hello\n\n
some pre-formatted code
\n\nworld"; + expect(textBlocks(input)).toEqual(["Hello", "world"]); + }); + + it('strips non-"pre" blocks and subtrees', () => { + const input = + "Hello\n\n
some pre-formatted code
\n\nworld"; + expect(textBlocks(input)).toEqual(["Hello", "world"]); + }); }); describe("coalesceText", () => { @@ -134,7 +182,7 @@ describe("plugins/github/parseMarkdown", () => { "", "[1]: https://example.com/", "", - "Here's a list: ", + "Here's a list:", " - **important** things", " - *also **important*** stuff", " - a*b*c versus `a*b*c`", @@ -157,7 +205,7 @@ describe("plugins/github/parseMarkdown", () => { "much wow", "```", "", - "Here's a list: ", + "Here's a list:", " - important things", " - also important stuff", " - abc versus `a*b*c`", diff --git a/src/plugins/github/parseReferences.test.js b/src/plugins/github/parseReferences.test.js index f2bb407..9ba6058 100644 --- a/src/plugins/github/parseReferences.test.js +++ b/src/plugins/github/parseReferences.test.js @@ -38,6 +38,16 @@ describe("plugins/github/parseReferences", () => { expect(parseReferences(input)).toHaveLength(0); }); + it("finds references in normal HTML elements", () => { + const input = "see #1, #2, and #3 for context"; + expect(parseReferences(input)).toHaveLength(3); + }); + + it('does not find references in HTML "code" elements', () => { + const input = "see #1, #2, and #3 for context"; + expect(parseReferences(input)).toHaveLength(0); + }); + it("does not find references in inline code with lots of backticks", () => { // An attempt to evade inline code with regular expressions might // well fail here, because an even number of backticks appears on