Construct AST WIP: add block-level nodes

Build a partial tree by adding block nodes. The block nodes will then be traversed and inline markdown parsed inside each of them. Tests are broken at this point until the full tree is constructed.
2016-03-30 12:57:12 +03:00 · 2016-03-30 12:57:12 +03:00 · 7c95b7a189
parent 94893247d1
commit 7c95b7a189
2 changed files with 232 additions and 102 deletions
--- a/block.go
+++ b/block.go
@ -15,10 +15,23 @@ package blackfriday
 import (
 	"bytes"
 	"html"
 	"regexp"
 	"github.com/shurcooL/sanitized_anchor_name"
 )
 const (
 	Entity    = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
 	Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
 )
 var (
 	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
 	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
 	reTrailingWhitespace  = regexp.MustCompile("(\n *)+$")
 )
 // Parse block-level data.
 // Note: this function and many that it calls assume that
 // the input buffer ends with a newline.
@ -116,7 +129,7 @@ func (p *parser) block(data []byte) {
 		// or
 		// ______
 		if p.isHRule(data) {
-			p.r.HRule()
+			p.addBlock(HorizontalRule, nil)
 			var i int
 			for i = 0; data[i] != '\n'; i++ {
 			}
@ -189,6 +202,13 @@ func (p *parser) block(data []byte) {
 	p.nesting--
 }
 func (p *parser) addBlock(typ NodeType, content []byte) *Node {
 	p.closeUnmatchedBlocks()
 	container := p.addChild(typ, 0)
 	container.content = content
 	return container
 }
 func (p *parser) isPrefixHeader(data []byte) bool {
 	if data[0] != '#' {
 		return false
@ -245,11 +265,9 @@ func (p *parser) prefixHeader(data []byte) int {
 		if id == "" && p.flags&AutoHeaderIDs != 0 {
 			id = sanitized_anchor_name.Create(string(data[i:end]))
 		}
-		p.r.BeginHeader(level, id)
+		block := p.addBlock(Header, data[i:end])
-		header := p.r.CopyWrites(func() {
+		block.HeaderID = id
-			p.inline(data[i:end])
+		block.Level = uint32(level)
 		})
 		p.r.EndHeader(level, id, header)
 	}
 	return skip
 }
@ -294,9 +312,14 @@ func (p *parser) titleBlock(data []byte, doRender bool) int {
 	}
 	data = bytes.Join(splitData[0:i], []byte("\n"))
-	p.r.TitleBlock(data)
+	consumed := len(data)
 	data = bytes.TrimPrefix(data, []byte("% "))
 	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
 	block := p.addBlock(Header, data)
 	block.Level = 1
 	block.IsTitleblock = true
-	return len(data)
+	return consumed
 }
 func (p *parser) html(data []byte, doRender bool) int {
@ -391,12 +414,17 @@ func (p *parser) html(data []byte, doRender bool) int {
 		for end > 0 && data[end-1] == '\n' {
 			end--
 		}
-		p.r.BlockHtml(data[:end])
+		finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
 	}
 	return i
 }
 func finalizeHtmlBlock(block *Node) {
 	block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
 	block.content = []byte{}
 }
 // HTML comment, lax form
 func (p *parser) htmlComment(data []byte, doRender bool) int {
 	i := p.inlineHtmlComment(data)
@ -409,7 +437,8 @@ func (p *parser) htmlComment(data []byte, doRender bool) int {
 			for end > 0 && data[end-1] == '\n' {
 				end--
 			}
-			p.r.BlockHtml(data[:end])
+			block := p.addBlock(HtmlBlock, data[:end])
 			finalizeHtmlBlock(block)
 		}
 		return size
 	}
@ -441,7 +470,7 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
 				for end > 0 && data[end-1] == '\n' {
 					end--
 				}
-				p.r.BlockHtml(data[:end])
+				finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
 			}
 			return size
 		}
@ -464,7 +493,9 @@ func (p *parser) htmlFindTag(data []byte) (string, bool) {
 func (p *parser) htmlFindEnd(tag string, data []byte) int {
 	// assume data[0] == '<' && data[1] == '/' already tested
-
+	if tag == "hr" {
 		return 2
 	}
 	// check if tag is a match
 	closetag := []byte("</" + tag + ">")
 	if !bytes.HasPrefix(data, closetag) {
@ -642,6 +673,10 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
 	}
 	var work bytes.Buffer
 	if lang != nil {
 		work.Write([]byte(*lang))
 		work.WriteByte('\n')
 	}
 	for {
 		// safe to assume beg < len(data)
@ -668,48 +703,76 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
 		beg = end
 	}
-	syntax := ""
+	//syntax := ""
-	if lang != nil {
+	//if lang != nil {
-		syntax = *lang
+	//	syntax = *lang
-	}
+	//}
 	if doRender {
-		p.r.BlockCode(work.Bytes(), syntax)
+		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 		block.IsFenced = true
 		finalizeCodeBlock(block)
 	}
 	return beg
 }
 func unescapeChar(str []byte) []byte {
 	if str[0] == '\\' {
 		return []byte{str[1]}
 	}
 	return []byte(html.UnescapeString(string(str)))
 }
 func unescapeString(str []byte) []byte {
 	if reBackslashOrAmp.Match(str) {
 		return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
 	} else {
 		return str
 	}
 }
 func finalizeCodeBlock(block *Node) {
 	if block.IsFenced {
 		newlinePos := bytes.IndexByte(block.content, '\n')
 		firstLine := block.content[:newlinePos]
 		rest := block.content[newlinePos+1:]
 		block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 		block.Literal = rest
 	} else {
 		block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
 	}
 	block.content = nil
 }
 func (p *parser) table(data []byte) int {
-	var header bytes.Buffer
+	table := p.addBlock(Table, nil)
-	i, columns := p.tableHeader(&header, data)
+	i, columns := p.tableHeader(data)
 	if i == 0 {
 		p.tip = table.Parent
 		table.unlink()
 		return 0
 	}
-	var body bytes.Buffer
+	p.addBlock(TableBody, nil)
-	body.Write(p.r.CaptureWrites(func() {
+	for i < len(data) {
-		for i < len(data) {
+		pipes, rowStart := 0, i
-			pipes, rowStart := 0, i
+		for ; data[i] != '\n'; i++ {
-			for ; data[i] != '\n'; i++ {
+			if data[i] == '|' {
-				if data[i] == '|' {
+				pipes++
 					pipes++
 				}
 			}
 			if pipes == 0 {
 				i = rowStart
 				break
 			}
 			// include the newline in data sent to tableRow
 			i++
 			p.tableRow(data[rowStart:i], columns, false)
 		}
 	}))
-	p.r.Table(header.Bytes(), body.Bytes(), columns)
+		if pipes == 0 {
 			i = rowStart
 			break
 		}
 		// include the newline in data sent to tableRow
 		i++
 		p.tableRow(data[rowStart:i], columns, false)
 	}
 	return i
 }
@ -723,7 +786,7 @@ func isBackslashEscaped(data []byte, i int) bool {
 	return backslashes&1 == 1
 }
-func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
+func (p *parser) tableHeader(data []byte) (size int, columns []int) {
 	i := 0
 	colCount := 1
 	for i = 0; data[i] != '\n'; i++ {
@ -821,16 +884,15 @@ func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns
 		return
 	}
-	out.Write(p.r.CaptureWrites(func() {
+	p.addBlock(TableHead, nil)
-		p.tableRow(header, columns, true)
+	p.tableRow(header, columns, true)
 	}))
 	size = i + 1
 	return
 }
 func (p *parser) tableRow(data []byte, columns []int, header bool) {
 	p.addBlock(TableRow, nil)
 	i, col := 0, 0
 	var rowWork bytes.Buffer
 	if data[i] == '|' && !isBackslashEscaped(data, i) {
 		i++
@ -856,29 +918,19 @@ func (p *parser) tableRow(data []byte, columns []int, header bool) {
 			cellEnd--
 		}
-		cellWork := p.r.CaptureWrites(func() {
+		cell := p.addBlock(TableCell, data[cellStart:cellEnd])
-			p.inline(data[cellStart:cellEnd])
+		cell.IsHeader = header
-		})
+		cell.Align = columns[col]
 		if header {
 			p.r.TableHeaderCell(&rowWork, cellWork, columns[col])
 		} else {
 			p.r.TableCell(&rowWork, cellWork, columns[col])
 		}
 	}
 	// pad it out with empty columns to get the right number
 	for ; col < len(columns); col++ {
-		if header {
+		cell := p.addBlock(TableCell, nil)
-			p.r.TableHeaderCell(&rowWork, nil, columns[col])
+		cell.IsHeader = header
-		} else {
+		cell.Align = columns[col]
 			p.r.TableCell(&rowWork, nil, columns[col])
 		}
 	}
 	// silently ignore rows with too many cells
 	p.r.TableRow(rowWork.Bytes())
 }
 // returns blockquote prefix length
@ -910,6 +962,7 @@ func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
 // parse a blockquote fragment
 func (p *parser) quote(data []byte) int {
 	block := p.addBlock(BlockQuote, nil)
 	var raw bytes.Buffer
 	beg, end := 0, 0
 	for beg < len(data) {
@ -928,22 +981,18 @@ func (p *parser) quote(data []byte) int {
 			end++
 		}
 		end++
 		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 			// skip the prefix
 			beg += pre
 		} else if p.terminateBlockquote(data, beg, end) {
 			break
 		}
 		// this line is part of the blockquote
 		raw.Write(data[beg:end])
 		beg = end
 	}
-
+	p.block(raw.Bytes())
-	p.r.BlockQuote(p.r.CaptureWrites(func() {
+	p.finalize(block)
 		p.block(raw.Bytes())
 	}))
 	return end
 }
@ -995,7 +1044,9 @@ func (p *parser) code(data []byte) int {
 	work.WriteByte('\n')
-	p.r.BlockCode(work.Bytes(), "")
+	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 	block.IsFenced = false
 	finalizeCodeBlock(block)
 	return i
 }
@ -1057,10 +1108,19 @@ func (p *parser) dliPrefix(data []byte) int {
 func (p *parser) list(data []byte, flags ListType) int {
 	i := 0
 	flags |= ListItemBeginningOfList
-	p.r.BeginList(flags)
+	block := p.addBlock(List, nil)
 	block.ListData = &ListData{ // TODO: fill in the real ListData
 		Flags:      flags,
 		Tight:      true,
 		BulletChar: '*',
 		Delimiter:  0,
 	}
 	for i < len(data) {
 		skip := p.listItem(data[i:], &flags)
 		if flags&ListItemContainsBlock != 0 {
 			block.ListData.Tight = false
 		}
 		i += skip
 		if skip == 0 || flags&ListItemEndOfList != 0 {
 			break
@ -1068,10 +1128,53 @@ func (p *parser) list(data []byte, flags ListType) int {
 		flags &= ^ListItemBeginningOfList
 	}
-	p.r.EndList(flags)
+	above := block.Parent
 	finalizeList(block)
 	p.tip = above
 	return i
 }
 // Returns true if block ends with a blank line, descending if needed
 // into lists and sublists.
 func endsWithBlankLine(block *Node) bool {
 	// TODO: figure this out. Always false now.
 	for block != nil {
 		//if block.lastLineBlank {
 		//return true
 		//}
 		t := block.Type
 		if t == List || t == Item {
 			block = block.LastChild
 		} else {
 			break
 		}
 	}
 	return false
 }
 func finalizeList(block *Node) {
 	block.open = false
 	item := block.FirstChild
 	for item != nil {
 		// check for non-final list item ending with blank line:
 		if endsWithBlankLine(item) && item.Next != nil {
 			block.ListData.Tight = false
 			break
 		}
 		// recurse into children of list item, to see if there are spaces
 		// between any of them:
 		subItem := item.FirstChild
 		for subItem != nil {
 			if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
 				block.ListData.Tight = false
 				break
 			}
 			subItem = subItem.Next
 		}
 		item = item.Next
 	}
 }
 // Parse a single list item.
 // Assumes initial prefix is already removed if this is a sublist.
 func (p *parser) listItem(data []byte, flags *ListType) int {
@ -1223,44 +1326,34 @@ gatherlines:
 	rawBytes := raw.Bytes()
 	block := p.addBlock(Item, nil)
 	block.ListData = &ListData{ // TODO: fill in the real ListData
 		Flags:      *flags,
 		Tight:      false,
 		BulletChar: '*',
 		Delimiter:  0,
 	}
 	// render the contents of the list item
 	var cooked bytes.Buffer
 	if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
 		// intermediate render of block item, except for definition term
 		if sublist > 0 {
-			cooked.Write(p.r.CaptureWrites(func() {
+			p.block(rawBytes[:sublist])
-				p.block(rawBytes[:sublist])
+			p.block(rawBytes[sublist:])
 				p.block(rawBytes[sublist:])
 			}))
 		} else {
-			cooked.Write(p.r.CaptureWrites(func() {
+			p.block(rawBytes)
 				p.block(rawBytes)
 			}))
 		}
 	} else {
 		// intermediate render of inline item
 		if sublist > 0 {
-			cooked.Write(p.r.CaptureWrites(func() {
+			child := p.addChild(Paragraph, 0)
-				p.inline(rawBytes[:sublist])
+			child.content = rawBytes[:sublist]
-				p.block(rawBytes[sublist:])
+			p.block(rawBytes[sublist:])
 			}))
 		} else {
-			cooked.Write(p.r.CaptureWrites(func() {
+			child := p.addChild(Paragraph, 0)
-				p.inline(rawBytes)
+			child.content = rawBytes
 			}))
 		}
 	}
 	// render the actual list item
 	cookedBytes := cooked.Bytes()
 	parsedEnd := len(cookedBytes)
 	// strip trailing newlines
 	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
 		parsedEnd--
 	}
 	p.r.ListItem(cookedBytes[:parsedEnd], *flags)
 	return line
 }
@ -1284,9 +1377,7 @@ func (p *parser) renderParagraph(data []byte) {
 		end--
 	}
-	p.r.BeginParagraph()
+	p.addBlock(Paragraph, data[beg:end])
 	p.inline(data[beg:end])
 	p.r.EndParagraph()
 }
 func (p *parser) paragraph(data []byte) int {
@ -1335,11 +1426,9 @@ func (p *parser) paragraph(data []byte) int {
 					id = sanitized_anchor_name.Create(string(data[prev:eol]))
 				}
-				p.r.BeginHeader(level, id)
+				block := p.addBlock(Header, data[prev:eol])
-				header := p.r.CopyWrites(func() {
+				block.Level = uint32(level)
-					p.inline(data[prev:eol])
+				block.HeaderID = id
 				})
 				p.r.EndHeader(level, id, header)
 				// find the end of the underline
 				for data[i] != '\n' {
--- a/markdown.go
+++ b/markdown.go
@ -228,6 +228,12 @@ type parser struct {
 	// presence. If a ref is also a footnote, it's stored both in refs and here
 	// in notes. Slice is nil if footnotes not enabled.
 	notes []*reference
 	doc                  *Node
 	tip                  *Node // = doc
 	oldTip               *Node
 	lastMatchedContainer *Node // = doc
 	allClosed            bool
 }
 func (p *parser) getRef(refid string) (ref *reference, found bool) {
@ -250,6 +256,34 @@ func (p *parser) getRef(refid string) (ref *reference, found bool) {
 	return ref, found
 }
 func (p *parser) finalize(block *Node) {
 	above := block.Parent
 	block.open = false
 	p.tip = above
 }
 func (p *parser) addChild(node NodeType, offset uint32) *Node {
 	for !p.tip.canContain(node) {
 		p.finalize(p.tip)
 	}
 	newNode := NewNode(node)
 	newNode.content = []byte{}
 	p.tip.appendChild(newNode)
 	p.tip = newNode
 	return newNode
 }
 func (p *parser) closeUnmatchedBlocks() {
 	if !p.allClosed {
 		for p.oldTip != p.lastMatchedContainer {
 			parent := p.oldTip.Parent
 			p.finalize(p.oldTip)
 			p.oldTip = parent
 		}
 		p.allClosed = true
 	}
 }
 //
 //
 // Public interface
@ -366,6 +400,13 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
 	p.maxNesting = 16
 	p.insideLink = false
 	docNode := NewNode(Document)
 	p.doc = docNode
 	p.tip = docNode
 	p.oldTip = docNode
 	p.lastMatchedContainer = docNode
 	p.allClosed = true
 	// register inline parsers
 	p.inlineCallback['*'] = emphasis
 	p.inlineCallback['_'] = emphasis