Fixed line break handling

This commit is contained in:
Felix Krause 2015-11-29 16:50:27 +01:00
parent 9cba968c1e
commit f793247a41
2 changed files with 102 additions and 34 deletions

View File

@ -47,8 +47,8 @@ type
ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment, ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment,
# scalar reading states # scalar reading states
ylPlainScalar, ylSingleQuotedScalar, ylDoublyQuotedScalar, ylPlainScalar, ylSingleQuotedScalar, ylDoublyQuotedScalar,
ylEscape, ylBlockScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader,
ylBlockScalarHeader, ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar,
# indentation # indentation
ylIndentation, ylIndentation,
# comments # comments
@ -152,12 +152,10 @@ template yieldChar(c: char) {.dirty.} =
template handleCR() {.dirty.} = template handleCR() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen - my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
my.charoffset - 1 my.charoffset - 1
continue
template handleLF() {.dirty.} = template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1 my.charlen - my.charoffset - 1
continue
template `or`(r: Rune, i: int): Rune = template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i) cast[Rune](cast[int](r) or i)
@ -170,6 +168,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
escapeLength = 0 escapeLength = 0
expectedEscapeLength = 0 expectedEscapeLength = 0
trailingSpace = ""
# used to temporarily store whitespace after a plain scalar
lastSpecialChar: char = '\0' lastSpecialChar: char = '\0'
# stores chars that behave differently dependent on the following # stores chars that behave differently dependent on the following
# char. handling will be deferred to next loop iteration. # char. handling will be deferred to next loop iteration.
@ -209,6 +209,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
my.content.add(c) my.content.add(c)
of '#': of '#':
state = ylDirectiveComment state = ylDirectiveComment
my.content = ""
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
state = ylDirectiveLineEnd state = ylDirectiveLineEnd
continue continue
@ -264,9 +265,11 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
of '\r': of '\r':
handleCR() handleCR()
state = ylInitial state = ylInitial
continue
of '\x0A': of '\x0A':
handleLF() handleLF()
state = ylInitial state = ylInitial
continue
of EndOfFile: of EndOfFile:
yieldToken(yamlStreamEnd) yieldToken(yamlStreamEnd)
break break
@ -286,9 +289,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
yieldToken(yamlStreamEnd) yieldToken(yamlStreamEnd)
break break
else: else:
yieldError("Internal error! Please report this bug.") yieldError("Internal error: Unexpected char at line end: " & c)
state = ylInitialContent state = ylInitialContent
position = 0 position = 0
continue
of ylSingleQuotedScalar: of ylSingleQuotedScalar:
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
# ' is the only special char # ' is the only special char
@ -304,7 +308,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
else: else:
yieldToken(yamlScalar) yieldToken(yamlScalar)
lastSpecialChar = '\0' lastSpecialChar = '\0'
state = if flowDepth > 0: ylFlow else: ylBlock state = ylSpaceAfterQuotedScalar
continue continue
else: else:
case c case c
@ -320,7 +324,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
case c case c
of '"': of '"':
yieldToken(yamlScalar) yieldToken(yamlScalar)
state = if flowDepth > 0: ylFlow else: ylBlock state = ylSpaceAfterQuotedScalar
of EndOfFile: of EndOfFile:
yieldError("Unterminated doubly quoted string") yieldError("Unterminated doubly quoted string")
yieldToken(yamlStreamEnd) yieldToken(yamlStreamEnd)
@ -390,24 +394,32 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
my.content.add(toUTF8(unicodeChar)) my.content.add(toUTF8(unicodeChar))
state = ylDoublyQuotedScalar state = ylDoublyQuotedScalar
of ylSpaceAfterQuotedScalar:
case c
of ' ', '\t':
trailingSpace.add(c)
of '#':
if trailingSpace.len > 0:
yieldError("Missing space before comment start")
state = ylComment
trailingSpace = ""
else:
trailingSpace = ""
state = if flowDepth > 0: ylFlow else: ylBlock
continue
of ylPlainScalar: of ylPlainScalar:
if lastSpecialChar != '\0':
case c
of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar)
state = if flowDepth > 0: ylFlow else: ylBlock
continue
else:
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar) yieldToken(yamlScalar)
state = ylLineEnd state = ylLineEnd
continue continue
of ':', '#': of ':':
lastSpecialChar = c lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',': of ',':
if flowDepth > 0: lastSpecialChar = c if flowDepth > 0: lastSpecialChar = c
else: my.content.add(c) else: my.content.add(c)
@ -417,6 +429,49 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
continue continue
else: else:
my.content.add(c) my.content.add(c)
of ylSpaceAfterPlainScalar:
if lastSpecialChar != '\0':
case c
of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar)
state = if flowDepth > 0: ylFlow else: ylBlock
else:
my.content.add(trailingSpace)
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
trailingSpace = ""
state = ylPlainScalar
continue
case c
of EndOfFile, '\r', '\x0A':
trailingSpace = ""
yieldToken(yamlScalar)
state = ylLineEnd
continue
of ' ', '\t':
trailingSpace.add(c)
of ',':
if flowDepth > 0:
lastSpecialChar = c
else:
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
state = ylPlainScalar
of ':', '#':
lastSpecialChar = c
of '[', ']', '{', '}':
yieldToken(yamlScalar)
trailingSpace = ""
state = if flowDepth > 0: ylFlow else: ylBlock
continue
else:
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
state = ylPlainScalar
of ylFlow, ylBlock: of ylFlow, ylBlock:
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
@ -446,12 +501,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
state = ylPlainScalar state = ylPlainScalar
continue continue
case c case c
of EndOfFile: of '\r', '\x0A', EndOfFile:
if state == ylFlow:
yieldError("Unterminated flow content")
state = ylLineEnd
continue
of '\r', '\x0A':
state = ylLineEnd state = ylLineEnd
continue continue
of ',': of ',':
@ -551,11 +601,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
yieldToken(yamlTagHandle) yieldToken(yamlTagHandle)
my.content = suffix my.content = suffix
yieldToken(yamlTagSuffix) yieldToken(yamlTagSuffix)
if c in ['\r', '\x0A', EndOfFile]: state = if flowDepth > 0: ylFlow else: ylBlock
state = ylLineEnd continue
continue
else:
state = if flowDepth > 0: ylFlow else: ylBlock
else: else:
yieldError("Invalid character in tag handle: " & c) yieldError("Invalid character in tag handle: " & c)
my.content = "" my.content = ""
@ -567,11 +614,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
my.content.add(c) my.content.add(c)
of ' ', '\t', EndOfFile, '\r', '\x0A': of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlTagSuffix) yieldToken(yamlTagSuffix)
if c in ['\r', '\x0A', EndOfFile]: state = if flowDepth > 0: ylFlow else: ylBlock
state = ylLineEnd continue
continue
else:
state = if flowDepth > 0: ylFlow else: ylBlock
else: else:
yieldError("Invalid character in tag suffix: " & c) yieldError("Invalid character in tag suffix: " & c)
state = if flowDepth > 0: ylFlow else: ylBlock state = if flowDepth > 0: ylFlow else: ylBlock
@ -619,6 +663,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
continue continue
else: else:
state = ylUnknownDirectiveParam state = ylUnknownDirectiveParam
continue
of ylUnknownDirectiveParam: of ylUnknownDirectiveParam:
case c case c
of '\r', '\x0A', EndOfFile, ' ', '\t': of '\r', '\x0A', EndOfFile, ' ', '\t':

View File

@ -44,6 +44,23 @@ suite "Lexing":
t(yamlTagURI, "tag:http://example.com/"), t(yamlTagURI, "tag:http://example.com/"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Unknown directive":
ensure("%FOO bar baz", [t(yamlUnknownDirective, "%FOO"),
t(yamlUnknownDirectiveParam, "bar"),
t(yamlUnknownDirectiveParam, "baz"),
t(yamlStreamEnd, nil)])
test "Comments after directives":
ensure("%YAML 1.2 # version\n# at line start\n # indented\n%FOO",
[t(yamlYamlDirective, nil),
t(yamlMajorVersion, "1"),
t(yamlMinorVersion, "2"),
t(yamlComment, " version"),
t(yamlComment, " at line start"),
t(yamlComment, " indented"),
t(yamlUnknownDirective, "%FOO"),
t(yamlStreamEnd, nil)])
test "Directives end": test "Directives end":
ensure("---", [t(yamlDirectivesEnd, nil), ensure("---", [t(yamlDirectivesEnd, nil),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
@ -68,6 +85,12 @@ suite "Lexing":
t(yamlScalar, ":test ?content -with #special !chars"), t(yamlScalar, ":test ?content -with #special !chars"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Plain Scalar (starting with %)":
ensure("---\n%test", [t(yamlDirectivesEnd, nil),
t(yamlLineStart, ""),
t(yamlScalar, "%test"),
t(yamlStreamEnd, nil)])
test "Single Quoted Scalar": test "Single Quoted Scalar":
ensure("'? test - content! '", [t(yamlLineStart, nil), ensure("'? test - content! '", [t(yamlLineStart, nil),
t(yamlScalar, "? test - content! "), t(yamlScalar, "? test - content! "),