Properly handle multiline scalars

* single-quoted scalars: proper line folding
 * plain scalars: allow tabs before continuation lines
This commit is contained in:
Felix Krause 2016-03-17 21:25:30 +01:00
parent 961a8dba81
commit 91cc16cde7
1 changed files with 49 additions and 22 deletions

View File

@ -526,19 +526,6 @@ template documentEnd(lexer: var BaseLexer, isDocumentEnd: var bool) =
else: isDocumentEnd = false else: isDocumentEnd = false
else: isDocumentEnd = false else: isDocumentEnd = false
template singleQuotedScalar(lexer: BaseLexer, content: var string) =
debug("lex: singleQuotedScalar")
lexer.bufpos.inc()
while true:
case lexer.buf[lexer.bufpos]
of '\'':
lexer.bufpos.inc()
if lexer.buf[lexer.bufpos] == '\'': content.add('\'')
else: break
of EndOfFile: lexerError(lexer, "Unfinished single quoted string")
else: content.add(lexer.buf[lexer.bufpos])
lexer.bufpos.inc()
proc unicodeSequence(lexer: var BaseLexer, length: int): proc unicodeSequence(lexer: var BaseLexer, length: int):
string {.raises: [YamlParserError].} = string {.raises: [YamlParserError].} =
debug("lex: unicodeSequence") debug("lex: unicodeSequence")
@ -549,7 +536,7 @@ proc unicodeSequence(lexer: var BaseLexer, length: int):
digitPosition = length - i - 1 digitPosition = length - i - 1
c = lexer.buf[lexer.bufpos] c = lexer.buf[lexer.bufpos]
case c case c
of EndOFFile, '\l', '\r': of EndOFFile, '\l', '\c':
lexerError(lexer, "Unfinished unicode escape sequence") lexerError(lexer, "Unfinished unicode escape sequence")
of '0' .. '9': of '0' .. '9':
unicodeChar = unicodechar or unicodeChar = unicodechar or
@ -583,7 +570,7 @@ proc byteSequence(lexer: var BaseLexer): char {.raises: [YamlParserError].} =
else: lexerError(lexer, "Invalid character in octet escape sequence") else: lexerError(lexer, "Invalid character in octet escape sequence")
return char(charCode) return char(charCode)
template processDoubleQuotedWhitespace(newlines: var int) {.dirty.} = template processQuotedWhitespace(newlines: var int) {.dirty.} =
var after = "" var after = ""
block outer: block outer:
while true: while true:
@ -637,7 +624,7 @@ template doubleQuotedScalar(lexer: BaseLexer, content: var string) =
of 'n': content.add('\l') of 'n': content.add('\l')
of 'v': content.add('\v') of 'v': content.add('\v')
of 'f': content.add('\f') of 'f': content.add('\f')
of 'r': content.add('\r') of 'r': content.add('\c')
of 'e': content.add('\e') of 'e': content.add('\e')
of ' ': content.add(' ') of ' ': content.add(' ')
of '"': content.add('"') of '"': content.add('"')
@ -652,7 +639,7 @@ template doubleQuotedScalar(lexer: BaseLexer, content: var string) =
of 'U': content.add(lexer.unicodeSequence(8)) of 'U': content.add(lexer.unicodeSequence(8))
of '\l', '\c': of '\l', '\c':
var newlines = 0 var newlines = 0
processDoubleQuotedWhitespace(newlines) processQuotedWhitespace(newlines)
continue continue
else: lexerError(lexer, "Illegal character in escape sequence") else: lexerError(lexer, "Illegal character in escape sequence")
of '"': of '"':
@ -660,12 +647,29 @@ template doubleQuotedScalar(lexer: BaseLexer, content: var string) =
break break
of '\l', '\c', '\t', ' ': of '\l', '\c', '\t', ' ':
var newlines = 1 var newlines = 1
processdoubleQuotedWhitespace(newlines) processQuotedWhitespace(newlines)
continue continue
else: else:
content.add(c) content.add(c)
lexer.bufpos.inc() lexer.bufpos.inc()
template singleQuotedScalar(lexer: BaseLexer, content: var string) =
debug("lex: singleQuotedScalar")
lexer.bufpos.inc()
while true:
case lexer.buf[lexer.bufpos]
of '\'':
lexer.bufpos.inc()
if lexer.buf[lexer.bufpos] == '\'': content.add('\'')
else: break
of EndOfFile: lexerError(lexer, "Unfinished single quoted string")
of '\l', '\c', '\t', ' ':
var newlines = 1
processQuotedWhitespace(newlines)
continue
else: content.add(lexer.buf[lexer.bufpos])
lexer.bufpos.inc()
proc isPlainSafe(lexer: BaseLexer, index: int, context: YamlContext): bool = proc isPlainSafe(lexer: BaseLexer, index: int, context: YamlContext): bool =
case lexer.buf[lexer.bufpos + 1] case lexer.buf[lexer.bufpos + 1]
of spaceOrLineEnd: result = false of spaceOrLineEnd: result = false
@ -1150,14 +1154,26 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
p.lexer.plainScalar(content, cBlock) p.lexer.plainScalar(content, cBlock)
state = fpBlockAfterPlainScalar state = fpBlockAfterPlainScalar
of ' ': of ' ':
let c = p.lexer.buf[p.lexer.bufpos]
p.lexer.skipIndentation() p.lexer.skipIndentation()
if p.lexer.buf[p.lexer.bufpos] in if c in {'\l', '\c', '#', EndOfFile}:
{'\t', '\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(true) handleLineEnd(true)
elif c == '\t':
indentation = p.lexer.getColNumber(p.lexer.bufpos)
p.lexer.bufpos.inc()
while p.lexer.buf[p.lexer.bufpos] in {'\t', ' '}:
p.lexer.bufpos.inc()
if p.lexer.buf[p.lexer.bufpos] in {'\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding()
handleLineEnd(true)
else:
closeMoreIndentedLevels(true)
if level.kind == fplScalar: state = fpBlockContinueScalar
else: lexerError(p.lexer, "tabular not allowed here")
else: else:
indentation = p.lexer.getColNumber(p.lexer.bufpos) indentation = p.lexer.getColNumber(p.lexer.bufpos)
if p.lexer.buf[p.lexer.bufpos] == '-' and not if c == '-' and not
p.lexer.isPlainSafe(p.lexer.bufpos + 1, if flowdepth == 0: p.lexer.isPlainSafe(p.lexer.bufpos + 1, if flowdepth == 0:
cBlock else: cFlow): cBlock else: cFlow):
closeMoreIndentedLevels(true) closeMoreIndentedLevels(true)
@ -1171,7 +1187,18 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
of EndOfFile: of EndOfFile:
closeEverything() closeEverything()
break break
of '\t', '\l', '\c', '#': of '\t':
indentation = 0
p.lexer.bufpos.inc()
while p.lexer.buf[p.lexer.bufpos] in {'\t', ' '}: p.lexer.bufpos.inc()
if p.lexer.buf[p.lexer.bufpos] in {'\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding()
handleLineEnd(true)
else:
closeMoreIndentedLevels(true)
if level.kind == fplScalar: state = fpBlockContinueScalar
else: lexerError(p.lexer, "tabular not allowed here")
of '\l', '\c', '#':
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(true) handleLineEnd(true)
else: else: