Construct AST WIP: add block-level nodes
Build a partial tree by adding block nodes. The block nodes will then be traversed and inline markdown parsed inside each of them. Tests are broken at this point until the full tree is constructed.
This commit is contained in:
parent
94893247d1
commit
7c95b7a189
293
block.go
293
block.go
|
@ -15,10 +15,23 @@ package blackfriday
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"html"
|
||||
"regexp"
|
||||
|
||||
"github.com/shurcooL/sanitized_anchor_name"
|
||||
)
|
||||
|
||||
const (
|
||||
Entity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
|
||||
Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
|
||||
)
|
||||
|
||||
var (
|
||||
reBackslashOrAmp = regexp.MustCompile("[\\&]")
|
||||
reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
|
||||
reTrailingWhitespace = regexp.MustCompile("(\n *)+$")
|
||||
)
|
||||
|
||||
// Parse block-level data.
|
||||
// Note: this function and many that it calls assume that
|
||||
// the input buffer ends with a newline.
|
||||
|
@ -116,7 +129,7 @@ func (p *parser) block(data []byte) {
|
|||
// or
|
||||
// ______
|
||||
if p.isHRule(data) {
|
||||
p.r.HRule()
|
||||
p.addBlock(HorizontalRule, nil)
|
||||
var i int
|
||||
for i = 0; data[i] != '\n'; i++ {
|
||||
}
|
||||
|
@ -189,6 +202,13 @@ func (p *parser) block(data []byte) {
|
|||
p.nesting--
|
||||
}
|
||||
|
||||
func (p *parser) addBlock(typ NodeType, content []byte) *Node {
|
||||
p.closeUnmatchedBlocks()
|
||||
container := p.addChild(typ, 0)
|
||||
container.content = content
|
||||
return container
|
||||
}
|
||||
|
||||
func (p *parser) isPrefixHeader(data []byte) bool {
|
||||
if data[0] != '#' {
|
||||
return false
|
||||
|
@ -245,11 +265,9 @@ func (p *parser) prefixHeader(data []byte) int {
|
|||
if id == "" && p.flags&AutoHeaderIDs != 0 {
|
||||
id = sanitized_anchor_name.Create(string(data[i:end]))
|
||||
}
|
||||
p.r.BeginHeader(level, id)
|
||||
header := p.r.CopyWrites(func() {
|
||||
p.inline(data[i:end])
|
||||
})
|
||||
p.r.EndHeader(level, id, header)
|
||||
block := p.addBlock(Header, data[i:end])
|
||||
block.HeaderID = id
|
||||
block.Level = uint32(level)
|
||||
}
|
||||
return skip
|
||||
}
|
||||
|
@ -294,9 +312,14 @@ func (p *parser) titleBlock(data []byte, doRender bool) int {
|
|||
}
|
||||
|
||||
data = bytes.Join(splitData[0:i], []byte("\n"))
|
||||
p.r.TitleBlock(data)
|
||||
consumed := len(data)
|
||||
data = bytes.TrimPrefix(data, []byte("% "))
|
||||
data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
|
||||
block := p.addBlock(Header, data)
|
||||
block.Level = 1
|
||||
block.IsTitleblock = true
|
||||
|
||||
return len(data)
|
||||
return consumed
|
||||
}
|
||||
|
||||
func (p *parser) html(data []byte, doRender bool) int {
|
||||
|
@ -391,12 +414,17 @@ func (p *parser) html(data []byte, doRender bool) int {
|
|||
for end > 0 && data[end-1] == '\n' {
|
||||
end--
|
||||
}
|
||||
p.r.BlockHtml(data[:end])
|
||||
finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
func finalizeHtmlBlock(block *Node) {
|
||||
block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
|
||||
block.content = []byte{}
|
||||
}
|
||||
|
||||
// HTML comment, lax form
|
||||
func (p *parser) htmlComment(data []byte, doRender bool) int {
|
||||
i := p.inlineHtmlComment(data)
|
||||
|
@ -409,7 +437,8 @@ func (p *parser) htmlComment(data []byte, doRender bool) int {
|
|||
for end > 0 && data[end-1] == '\n' {
|
||||
end--
|
||||
}
|
||||
p.r.BlockHtml(data[:end])
|
||||
block := p.addBlock(HtmlBlock, data[:end])
|
||||
finalizeHtmlBlock(block)
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
@ -441,7 +470,7 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
|
|||
for end > 0 && data[end-1] == '\n' {
|
||||
end--
|
||||
}
|
||||
p.r.BlockHtml(data[:end])
|
||||
finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
@ -464,7 +493,9 @@ func (p *parser) htmlFindTag(data []byte) (string, bool) {
|
|||
|
||||
func (p *parser) htmlFindEnd(tag string, data []byte) int {
|
||||
// assume data[0] == '<' && data[1] == '/' already tested
|
||||
|
||||
if tag == "hr" {
|
||||
return 2
|
||||
}
|
||||
// check if tag is a match
|
||||
closetag := []byte("</" + tag + ">")
|
||||
if !bytes.HasPrefix(data, closetag) {
|
||||
|
@ -642,6 +673,10 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
|
|||
}
|
||||
|
||||
var work bytes.Buffer
|
||||
if lang != nil {
|
||||
work.Write([]byte(*lang))
|
||||
work.WriteByte('\n')
|
||||
}
|
||||
|
||||
for {
|
||||
// safe to assume beg < len(data)
|
||||
|
@ -668,48 +703,76 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
|
|||
beg = end
|
||||
}
|
||||
|
||||
syntax := ""
|
||||
if lang != nil {
|
||||
syntax = *lang
|
||||
}
|
||||
//syntax := ""
|
||||
//if lang != nil {
|
||||
// syntax = *lang
|
||||
//}
|
||||
|
||||
if doRender {
|
||||
p.r.BlockCode(work.Bytes(), syntax)
|
||||
block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
||||
block.IsFenced = true
|
||||
finalizeCodeBlock(block)
|
||||
}
|
||||
|
||||
return beg
|
||||
}
|
||||
|
||||
func unescapeChar(str []byte) []byte {
|
||||
if str[0] == '\\' {
|
||||
return []byte{str[1]}
|
||||
}
|
||||
return []byte(html.UnescapeString(string(str)))
|
||||
}
|
||||
|
||||
func unescapeString(str []byte) []byte {
|
||||
if reBackslashOrAmp.Match(str) {
|
||||
return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
||||
|
||||
func finalizeCodeBlock(block *Node) {
|
||||
if block.IsFenced {
|
||||
newlinePos := bytes.IndexByte(block.content, '\n')
|
||||
firstLine := block.content[:newlinePos]
|
||||
rest := block.content[newlinePos+1:]
|
||||
block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
|
||||
block.Literal = rest
|
||||
} else {
|
||||
block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
|
||||
}
|
||||
block.content = nil
|
||||
}
|
||||
|
||||
func (p *parser) table(data []byte) int {
|
||||
var header bytes.Buffer
|
||||
i, columns := p.tableHeader(&header, data)
|
||||
table := p.addBlock(Table, nil)
|
||||
i, columns := p.tableHeader(data)
|
||||
if i == 0 {
|
||||
p.tip = table.Parent
|
||||
table.unlink()
|
||||
return 0
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
p.addBlock(TableBody, nil)
|
||||
|
||||
body.Write(p.r.CaptureWrites(func() {
|
||||
for i < len(data) {
|
||||
pipes, rowStart := 0, i
|
||||
for ; data[i] != '\n'; i++ {
|
||||
if data[i] == '|' {
|
||||
pipes++
|
||||
}
|
||||
for i < len(data) {
|
||||
pipes, rowStart := 0, i
|
||||
for ; data[i] != '\n'; i++ {
|
||||
if data[i] == '|' {
|
||||
pipes++
|
||||
}
|
||||
|
||||
if pipes == 0 {
|
||||
i = rowStart
|
||||
break
|
||||
}
|
||||
|
||||
// include the newline in data sent to tableRow
|
||||
i++
|
||||
p.tableRow(data[rowStart:i], columns, false)
|
||||
}
|
||||
}))
|
||||
|
||||
p.r.Table(header.Bytes(), body.Bytes(), columns)
|
||||
if pipes == 0 {
|
||||
i = rowStart
|
||||
break
|
||||
}
|
||||
|
||||
// include the newline in data sent to tableRow
|
||||
i++
|
||||
p.tableRow(data[rowStart:i], columns, false)
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
@ -723,7 +786,7 @@ func isBackslashEscaped(data []byte, i int) bool {
|
|||
return backslashes&1 == 1
|
||||
}
|
||||
|
||||
func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
|
||||
func (p *parser) tableHeader(data []byte) (size int, columns []int) {
|
||||
i := 0
|
||||
colCount := 1
|
||||
for i = 0; data[i] != '\n'; i++ {
|
||||
|
@ -821,16 +884,15 @@ func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns
|
|||
return
|
||||
}
|
||||
|
||||
out.Write(p.r.CaptureWrites(func() {
|
||||
p.tableRow(header, columns, true)
|
||||
}))
|
||||
p.addBlock(TableHead, nil)
|
||||
p.tableRow(header, columns, true)
|
||||
size = i + 1
|
||||
return
|
||||
}
|
||||
|
||||
func (p *parser) tableRow(data []byte, columns []int, header bool) {
|
||||
p.addBlock(TableRow, nil)
|
||||
i, col := 0, 0
|
||||
var rowWork bytes.Buffer
|
||||
|
||||
if data[i] == '|' && !isBackslashEscaped(data, i) {
|
||||
i++
|
||||
|
@ -856,29 +918,19 @@ func (p *parser) tableRow(data []byte, columns []int, header bool) {
|
|||
cellEnd--
|
||||
}
|
||||
|
||||
cellWork := p.r.CaptureWrites(func() {
|
||||
p.inline(data[cellStart:cellEnd])
|
||||
})
|
||||
|
||||
if header {
|
||||
p.r.TableHeaderCell(&rowWork, cellWork, columns[col])
|
||||
} else {
|
||||
p.r.TableCell(&rowWork, cellWork, columns[col])
|
||||
}
|
||||
cell := p.addBlock(TableCell, data[cellStart:cellEnd])
|
||||
cell.IsHeader = header
|
||||
cell.Align = columns[col]
|
||||
}
|
||||
|
||||
// pad it out with empty columns to get the right number
|
||||
for ; col < len(columns); col++ {
|
||||
if header {
|
||||
p.r.TableHeaderCell(&rowWork, nil, columns[col])
|
||||
} else {
|
||||
p.r.TableCell(&rowWork, nil, columns[col])
|
||||
}
|
||||
cell := p.addBlock(TableCell, nil)
|
||||
cell.IsHeader = header
|
||||
cell.Align = columns[col]
|
||||
}
|
||||
|
||||
// silently ignore rows with too many cells
|
||||
|
||||
p.r.TableRow(rowWork.Bytes())
|
||||
}
|
||||
|
||||
// returns blockquote prefix length
|
||||
|
@ -910,6 +962,7 @@ func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
|
|||
|
||||
// parse a blockquote fragment
|
||||
func (p *parser) quote(data []byte) int {
|
||||
block := p.addBlock(BlockQuote, nil)
|
||||
var raw bytes.Buffer
|
||||
beg, end := 0, 0
|
||||
for beg < len(data) {
|
||||
|
@ -928,22 +981,18 @@ func (p *parser) quote(data []byte) int {
|
|||
end++
|
||||
}
|
||||
end++
|
||||
|
||||
if pre := p.quotePrefix(data[beg:]); pre > 0 {
|
||||
// skip the prefix
|
||||
beg += pre
|
||||
} else if p.terminateBlockquote(data, beg, end) {
|
||||
break
|
||||
}
|
||||
|
||||
// this line is part of the blockquote
|
||||
raw.Write(data[beg:end])
|
||||
beg = end
|
||||
}
|
||||
|
||||
p.r.BlockQuote(p.r.CaptureWrites(func() {
|
||||
p.block(raw.Bytes())
|
||||
}))
|
||||
p.block(raw.Bytes())
|
||||
p.finalize(block)
|
||||
return end
|
||||
}
|
||||
|
||||
|
@ -995,7 +1044,9 @@ func (p *parser) code(data []byte) int {
|
|||
|
||||
work.WriteByte('\n')
|
||||
|
||||
p.r.BlockCode(work.Bytes(), "")
|
||||
block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
||||
block.IsFenced = false
|
||||
finalizeCodeBlock(block)
|
||||
|
||||
return i
|
||||
}
|
||||
|
@ -1057,10 +1108,19 @@ func (p *parser) dliPrefix(data []byte) int {
|
|||
func (p *parser) list(data []byte, flags ListType) int {
|
||||
i := 0
|
||||
flags |= ListItemBeginningOfList
|
||||
p.r.BeginList(flags)
|
||||
block := p.addBlock(List, nil)
|
||||
block.ListData = &ListData{ // TODO: fill in the real ListData
|
||||
Flags: flags,
|
||||
Tight: true,
|
||||
BulletChar: '*',
|
||||
Delimiter: 0,
|
||||
}
|
||||
|
||||
for i < len(data) {
|
||||
skip := p.listItem(data[i:], &flags)
|
||||
if flags&ListItemContainsBlock != 0 {
|
||||
block.ListData.Tight = false
|
||||
}
|
||||
i += skip
|
||||
if skip == 0 || flags&ListItemEndOfList != 0 {
|
||||
break
|
||||
|
@ -1068,10 +1128,53 @@ func (p *parser) list(data []byte, flags ListType) int {
|
|||
flags &= ^ListItemBeginningOfList
|
||||
}
|
||||
|
||||
p.r.EndList(flags)
|
||||
above := block.Parent
|
||||
finalizeList(block)
|
||||
p.tip = above
|
||||
return i
|
||||
}
|
||||
|
||||
// Returns true if block ends with a blank line, descending if needed
|
||||
// into lists and sublists.
|
||||
func endsWithBlankLine(block *Node) bool {
|
||||
// TODO: figure this out. Always false now.
|
||||
for block != nil {
|
||||
//if block.lastLineBlank {
|
||||
//return true
|
||||
//}
|
||||
t := block.Type
|
||||
if t == List || t == Item {
|
||||
block = block.LastChild
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func finalizeList(block *Node) {
|
||||
block.open = false
|
||||
item := block.FirstChild
|
||||
for item != nil {
|
||||
// check for non-final list item ending with blank line:
|
||||
if endsWithBlankLine(item) && item.Next != nil {
|
||||
block.ListData.Tight = false
|
||||
break
|
||||
}
|
||||
// recurse into children of list item, to see if there are spaces
|
||||
// between any of them:
|
||||
subItem := item.FirstChild
|
||||
for subItem != nil {
|
||||
if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
|
||||
block.ListData.Tight = false
|
||||
break
|
||||
}
|
||||
subItem = subItem.Next
|
||||
}
|
||||
item = item.Next
|
||||
}
|
||||
}
|
||||
|
||||
// Parse a single list item.
|
||||
// Assumes initial prefix is already removed if this is a sublist.
|
||||
func (p *parser) listItem(data []byte, flags *ListType) int {
|
||||
|
@ -1223,44 +1326,34 @@ gatherlines:
|
|||
|
||||
rawBytes := raw.Bytes()
|
||||
|
||||
block := p.addBlock(Item, nil)
|
||||
block.ListData = &ListData{ // TODO: fill in the real ListData
|
||||
Flags: *flags,
|
||||
Tight: false,
|
||||
BulletChar: '*',
|
||||
Delimiter: 0,
|
||||
}
|
||||
|
||||
// render the contents of the list item
|
||||
var cooked bytes.Buffer
|
||||
if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
|
||||
// intermediate render of block item, except for definition term
|
||||
if sublist > 0 {
|
||||
cooked.Write(p.r.CaptureWrites(func() {
|
||||
p.block(rawBytes[:sublist])
|
||||
p.block(rawBytes[sublist:])
|
||||
}))
|
||||
p.block(rawBytes[:sublist])
|
||||
p.block(rawBytes[sublist:])
|
||||
} else {
|
||||
cooked.Write(p.r.CaptureWrites(func() {
|
||||
p.block(rawBytes)
|
||||
}))
|
||||
p.block(rawBytes)
|
||||
}
|
||||
} else {
|
||||
// intermediate render of inline item
|
||||
if sublist > 0 {
|
||||
cooked.Write(p.r.CaptureWrites(func() {
|
||||
p.inline(rawBytes[:sublist])
|
||||
p.block(rawBytes[sublist:])
|
||||
}))
|
||||
child := p.addChild(Paragraph, 0)
|
||||
child.content = rawBytes[:sublist]
|
||||
p.block(rawBytes[sublist:])
|
||||
} else {
|
||||
cooked.Write(p.r.CaptureWrites(func() {
|
||||
p.inline(rawBytes)
|
||||
}))
|
||||
child := p.addChild(Paragraph, 0)
|
||||
child.content = rawBytes
|
||||
}
|
||||
}
|
||||
|
||||
// render the actual list item
|
||||
cookedBytes := cooked.Bytes()
|
||||
parsedEnd := len(cookedBytes)
|
||||
|
||||
// strip trailing newlines
|
||||
for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
|
||||
parsedEnd--
|
||||
}
|
||||
p.r.ListItem(cookedBytes[:parsedEnd], *flags)
|
||||
|
||||
return line
|
||||
}
|
||||
|
||||
|
@ -1284,9 +1377,7 @@ func (p *parser) renderParagraph(data []byte) {
|
|||
end--
|
||||
}
|
||||
|
||||
p.r.BeginParagraph()
|
||||
p.inline(data[beg:end])
|
||||
p.r.EndParagraph()
|
||||
p.addBlock(Paragraph, data[beg:end])
|
||||
}
|
||||
|
||||
func (p *parser) paragraph(data []byte) int {
|
||||
|
@ -1335,11 +1426,9 @@ func (p *parser) paragraph(data []byte) int {
|
|||
id = sanitized_anchor_name.Create(string(data[prev:eol]))
|
||||
}
|
||||
|
||||
p.r.BeginHeader(level, id)
|
||||
header := p.r.CopyWrites(func() {
|
||||
p.inline(data[prev:eol])
|
||||
})
|
||||
p.r.EndHeader(level, id, header)
|
||||
block := p.addBlock(Header, data[prev:eol])
|
||||
block.Level = uint32(level)
|
||||
block.HeaderID = id
|
||||
|
||||
// find the end of the underline
|
||||
for data[i] != '\n' {
|
||||
|
|
41
markdown.go
41
markdown.go
|
@ -228,6 +228,12 @@ type parser struct {
|
|||
// presence. If a ref is also a footnote, it's stored both in refs and here
|
||||
// in notes. Slice is nil if footnotes not enabled.
|
||||
notes []*reference
|
||||
|
||||
doc *Node
|
||||
tip *Node // = doc
|
||||
oldTip *Node
|
||||
lastMatchedContainer *Node // = doc
|
||||
allClosed bool
|
||||
}
|
||||
|
||||
func (p *parser) getRef(refid string) (ref *reference, found bool) {
|
||||
|
@ -250,6 +256,34 @@ func (p *parser) getRef(refid string) (ref *reference, found bool) {
|
|||
return ref, found
|
||||
}
|
||||
|
||||
func (p *parser) finalize(block *Node) {
|
||||
above := block.Parent
|
||||
block.open = false
|
||||
p.tip = above
|
||||
}
|
||||
|
||||
func (p *parser) addChild(node NodeType, offset uint32) *Node {
|
||||
for !p.tip.canContain(node) {
|
||||
p.finalize(p.tip)
|
||||
}
|
||||
newNode := NewNode(node)
|
||||
newNode.content = []byte{}
|
||||
p.tip.appendChild(newNode)
|
||||
p.tip = newNode
|
||||
return newNode
|
||||
}
|
||||
|
||||
func (p *parser) closeUnmatchedBlocks() {
|
||||
if !p.allClosed {
|
||||
for p.oldTip != p.lastMatchedContainer {
|
||||
parent := p.oldTip.Parent
|
||||
p.finalize(p.oldTip)
|
||||
p.oldTip = parent
|
||||
}
|
||||
p.allClosed = true
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// Public interface
|
||||
|
@ -366,6 +400,13 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
|
|||
p.maxNesting = 16
|
||||
p.insideLink = false
|
||||
|
||||
docNode := NewNode(Document)
|
||||
p.doc = docNode
|
||||
p.tip = docNode
|
||||
p.oldTip = docNode
|
||||
p.lastMatchedContainer = docNode
|
||||
p.allClosed = true
|
||||
|
||||
// register inline parsers
|
||||
p.inlineCallback['*'] = emphasis
|
||||
p.inlineCallback['_'] = emphasis
|
||||
|
|
Loading…
Reference in New Issue