finished removing redundant end-of-buffer checks in block parsing; code cleanup

This commit is contained in:
Russ Ross 2011-07-04 18:56:29 -06:00
parent e35b4b66cc
commit fb435fe2e3
2 changed files with 138 additions and 128 deletions

264
block.go
View File

@ -861,50 +861,51 @@ func (parser *Parser) blockCodePrefix(data []byte) int {
return 0 return 0
} }
// TODO: continue redundant end-of-buffer check removal here
func (parser *Parser) blockCode(out *bytes.Buffer, data []byte) int { func (parser *Parser) blockCode(out *bytes.Buffer, data []byte) int {
var work bytes.Buffer var work bytes.Buffer
beg, end := 0, 0 i := 0
for beg < len(data) { for i < len(data) {
for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { beg := i
for data[i] != '\n' {
i++
} }
i++
if pre := parser.blockCodePrefix(data[beg:end]); pre > 0 { blankline := parser.isEmpty(data[beg:i]) > 0
if pre := parser.blockCodePrefix(data[beg:i]); pre > 0 {
beg += pre beg += pre
} else { } else {
if parser.isEmpty(data[beg:end]) == 0 { if !blankline {
// non-empty non-prefixed line breaks the pre // non-empty, non-prefixed line breaks the pre
i = beg
break break
} }
} }
if beg < end { // verbatim copy to the working buffeu
// verbatim copy to the working buffer, escaping entities if blankline {
if parser.isEmpty(data[beg:end]) > 0 { work.WriteByte('\n')
work.WriteByte('\n') } else {
} else { work.Write(data[beg:i])
work.Write(data[beg:end])
}
} }
beg = end
} }
// trim all the \n off the end of work // trim all the \n off the end of work
workbytes := work.Bytes() workbytes := work.Bytes()
n := 0 eol := len(workbytes)
for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' { for eol > 0 && workbytes[eol-1] == '\n' {
n++ eol--
} }
if n > 0 { if eol != len(workbytes) {
work.Truncate(len(workbytes) - n) work.Truncate(eol)
} }
work.WriteByte('\n') work.WriteByte('\n')
parser.r.BlockCode(out, work.Bytes(), "") parser.r.BlockCode(out, work.Bytes(), "")
return beg return i
} }
// returns unordered list item prefix // returns unordered list item prefix
@ -912,13 +913,12 @@ func (parser *Parser) blockUliPrefix(data []byte) int {
i := 0 i := 0
// start with up to 3 spaces // start with up to 3 spaces
for i < len(data) && i < 3 && data[i] == ' ' { for i < 3 && data[i] == ' ' {
i++ i++
} }
// need a *, +, or - followed by a space/tab // need a *, +, or - followed by a space/tab
if i+1 >= len(data) || if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
data[i+1] != ' ' { data[i+1] != ' ' {
return 0 return 0
} }
@ -930,18 +930,18 @@ func (parser *Parser) blockOliPrefix(data []byte) int {
i := 0 i := 0
// start with up to 3 spaces // start with up to 3 spaces
for i < len(data) && i < 3 && data[i] == ' ' { for i < 3 && data[i] == ' ' {
i++ i++
} }
// count the digits // count the digits
start := i start := i
for i < len(data) && data[i] >= '0' && data[i] <= '9' { for data[i] >= '0' && data[i] <= '9' {
i++ i++
} }
// we need >= 1 digits followed by a dot and a space/tab // we need >= 1 digits followed by a dot and a space/tab
if start == i || data[i] != '.' || i+1 >= len(data) || data[i+1] != ' ' { if start == i || data[i] != '.' || data[i+1] != ' ' {
return 0 return 0
} }
return i + 2 return i + 2
@ -952,12 +952,11 @@ func (parser *Parser) blockList(out *bytes.Buffer, data []byte, flags int) int {
i := 0 i := 0
flags |= LIST_ITEM_BEGINNING_OF_LIST flags |= LIST_ITEM_BEGINNING_OF_LIST
work := func() bool { work := func() bool {
j := 0
for i < len(data) { for i < len(data) {
j = parser.blockListItem(out, data[i:], &flags) skip := parser.blockListItem(out, data[i:], &flags)
i += j i += skip
if j == 0 || flags&LIST_ITEM_END_OF_LIST != 0 { if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
break break
} }
flags &= ^LIST_ITEM_BEGINNING_OF_LIST flags &= ^LIST_ITEM_BEGINNING_OF_LIST
@ -969,164 +968,174 @@ func (parser *Parser) blockList(out *bytes.Buffer, data []byte, flags int) int {
return i return i
} }
// parse a single list item // Parse a single list item.
// assumes initial prefix is already removed // Assumes initial prefix is already removed if this is a sublist.
func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int) int { func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int) int {
// keep track of the first indentation prefix // keep track of the indentation of the first line
beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0 itemIndent := 0
for itemIndent < 3 && data[itemIndent] == ' ' {
for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' { itemIndent++
orgpre++
} }
beg = parser.blockUliPrefix(data) i := parser.blockUliPrefix(data)
if beg == 0 { if i == 0 {
beg = parser.blockOliPrefix(data) i = parser.blockOliPrefix(data)
} }
if beg == 0 { if i == 0 {
return 0 return 0
} }
// skip leading whitespace on first line // skip leading whitespace on first line
for beg < len(data) && data[beg] == ' ' { for data[i] == ' ' {
beg++ i++
} }
// skip to the beginning of the following line // find the end of the line
end = beg line := i
for end < len(data) && data[end-1] != '\n' { for data[i-1] != '\n' {
end++ i++
} }
// get working buffers // get working buffer
var rawItem bytes.Buffer var raw bytes.Buffer
var parsed bytes.Buffer
// put the first line into the working buffer // put the first line into the working buffer
rawItem.Write(data[beg:end]) raw.Write(data[line:i])
beg = end line = i
// process the following lines // process the following lines
containsBlankLine, containsBlock := false, false containsBlankLine := false
for beg < len(data) { sublist := 0
end++
for end < len(data) && data[end-1] != '\n' { loop:
end++ for line < len(data) {
i++
// find the end of this line
for data[i-1] != '\n' {
i++
} }
// process an empty line // if it is an empty line, guess that it is part of this item
if parser.isEmpty(data[beg:end]) > 0 { // and move on to the next line
if parser.isEmpty(data[line:i]) > 0 {
containsBlankLine = true containsBlankLine = true
beg = end line = i
continue continue
} }
// calculate the indentation // calculate the indentation
i = 0 indent := 0
for i < 4 && beg+i < end && data[beg+i] == ' ' { for indent < 4 && line+indent < i && data[line+indent] == ' ' {
i++ indent++
} }
pre = i chunk := data[line+indent : i]
chunk := data[beg+i : end]
// evaluate how this line fits in
switch {
// is this a nested list item?
case (parser.blockUliPrefix(chunk) > 0 && !parser.isHRule(chunk)) ||
parser.blockOliPrefix(chunk) > 0:
// check for a nested list item
if (parser.blockUliPrefix(chunk) > 0 && !parser.isHRule(chunk)) ||
parser.blockOliPrefix(chunk) > 0 {
if containsBlankLine { if containsBlankLine {
containsBlock = true *flags |= LIST_ITEM_CONTAINS_BLOCK
} }
// the following item must have the same indentation // to be a nested list, it must be indented more
if pre == orgpre { // if not, it is the next item in the same list
break if indent <= itemIndent {
break loop
} }
// is this the first item in the the nested list?
if sublist == 0 { if sublist == 0 {
sublist = rawItem.Len() sublist = raw.Len()
} }
} else {
// how about a nested prefix header? // is this a nested prefix header?
if parser.isPrefixHeader(chunk) { case parser.isPrefixHeader(chunk):
// only nest headers that are indented // if the header is not indented, it is not nested in the list
if containsBlankLine && i < 4 { // and thus ends the list
*flags |= LIST_ITEM_END_OF_LIST if containsBlankLine && indent < 4 {
break *flags |= LIST_ITEM_END_OF_LIST
} break loop
containsBlock = true
} else {
// only join stuff after empty lines when indented
if containsBlankLine && i < 4 {
*flags |= LIST_ITEM_END_OF_LIST
break
} else {
if containsBlankLine {
rawItem.WriteByte('\n')
containsBlock = true
}
}
} }
*flags |= LIST_ITEM_CONTAINS_BLOCK
// anything following an empty line is only part
// of this item if it is indented 4 spaces
// (regardless of the indentation of the beginning of the item)
case containsBlankLine && indent < 4:
*flags |= LIST_ITEM_END_OF_LIST
break loop
// a blank line means this should be parsed as a block
case containsBlankLine:
raw.WriteByte('\n')
*flags |= LIST_ITEM_CONTAINS_BLOCK
} }
containsBlankLine = false containsBlankLine = false
// add the line into the working buffer without prefix // add the line into the working buffer without prefix
rawItem.Write(data[beg+i : end]) raw.Write(data[line+indent : i])
beg = end line = i
} }
// render li contents // render the contents of the list item
if containsBlock { rawBytes := raw.Bytes()
*flags |= LIST_ITEM_CONTAINS_BLOCK var cooked bytes.Buffer
}
rawItemBytes := rawItem.Bytes()
if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
// intermediate render of block li // intermediate render of block li
if sublist > 0 && sublist < len(rawItemBytes) { if sublist > 0 {
parser.parseBlock(&parsed, rawItemBytes[:sublist]) parser.parseBlock(&cooked, rawBytes[:sublist])
parser.parseBlock(&parsed, rawItemBytes[sublist:]) parser.parseBlock(&cooked, rawBytes[sublist:])
} else { } else {
parser.parseBlock(&parsed, rawItemBytes) parser.parseBlock(&cooked, rawBytes)
} }
} else { } else {
// intermediate render of inline li // intermediate render of inline li
if sublist > 0 && sublist < len(rawItemBytes) { if sublist > 0 {
parser.parseInline(&parsed, rawItemBytes[:sublist]) parser.parseInline(&cooked, rawBytes[:sublist])
parser.parseBlock(&parsed, rawItemBytes[sublist:]) parser.parseBlock(&cooked, rawBytes[sublist:])
} else { } else {
parser.parseInline(&parsed, rawItemBytes) parser.parseInline(&cooked, rawBytes)
} }
} }
// render li itself // render the actual list item
parsedBytes := parsed.Bytes() cookedBytes := cooked.Bytes()
parsedEnd := len(parsedBytes) parsedEnd := len(cookedBytes)
for parsedEnd > 0 && parsedBytes[parsedEnd-1] == '\n' {
// strip trailing newlines
for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
parsedEnd-- parsedEnd--
} }
parser.r.ListItem(out, parsedBytes[:parsedEnd], *flags) parser.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
return beg return line
} }
// render a single paragraph that has already been parsed out // render a single paragraph that has already been parsed out
func (parser *Parser) renderParagraph(out *bytes.Buffer, data []byte) { func (parser *Parser) renderParagraph(out *bytes.Buffer, data []byte) {
// trim leading whitespace if len(data) == 0 {
return
}
// trim leading spaces
beg := 0 beg := 0
for beg < len(data) && isspace(data[beg]) { for data[beg] == ' ' {
beg++ beg++
} }
// trim trailing whitespace // trim trailing newline
end := len(data) end := len(data) - 1
for end > beg && isspace(data[end-1]) {
// trim trailing spaces
for end > beg && data[end-1] == ' ' {
end-- end--
} }
if end == beg {
return
}
work := func() bool { work := func() bool {
parser.parseInline(out, data[beg:end]) parser.parseInline(out, data[beg:end])
@ -1180,7 +1189,8 @@ func (parser *Parser) blockParagraph(out *bytes.Buffer, data []byte) int {
parser.r.Header(out, work, level) parser.r.Header(out, work, level)
// find the end of the underline // find the end of the underline
for ; i < len(data) && data[i] != '\n'; i++ { for data[i] != '\n' {
i++
} }
return i return i
} }
@ -1202,10 +1212,10 @@ func (parser *Parser) blockParagraph(out *bytes.Buffer, data []byte) int {
} }
// otherwise, scan to the beginning of the next line // otherwise, scan to the beginning of the next line
i++ for data[i] != '\n' {
for i < len(data) && data[i-1] != '\n' {
i++ i++
} }
i++
} }
parser.renderParagraph(out, data[:i]) parser.renderParagraph(out, data[:i])

View File

@ -56,7 +56,7 @@ func doTestsReference(t *testing.T, files []string) {
basename+".text", expected, actual) basename+".text", expected, actual)
} }
// now test every substring of every input to check for // now test every prefix of every input to check for
// bounds checking // bounds checking
if !testing.Short() { if !testing.Short() {
start := 0 start := 0