Bug fixes in lexer and parser

* Lexer: column now always points to start of token
 * Lexer: added ability to compile with -d:yamlDebug for debug output
 * Parser: Correctly handle nested sequences and maps
 * Tests: Added parser tests for nested objects
This commit is contained in:
Felix Krause 2015-12-10 20:53:43 +01:00
parent 73ce41bbef
commit e642e41f90
3 changed files with 37 additions and 6 deletions

View File

@ -145,6 +145,12 @@ proc open*(my: var YamlLexer, input: Stream) =
my.column = 0 my.column = 0
template yieldToken(mKind: YamlLexerTokenKind) {.dirty.} = template yieldToken(mKind: YamlLexerTokenKind) {.dirty.} =
when defined(yamlDebug):
if mKind == yamlScalar:
echo "Lexer token: yamlScalar(\"", my.content, "\")"
else:
echo "Lexer token: ", mKind
yield (kind: mKind) yield (kind: mKind)
my.content = "" my.content = ""
@ -157,13 +163,13 @@ template handleCR() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen - my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
my.charoffset - 1 my.charoffset - 1
my.line.inc() my.line.inc()
my.column = 0 curPos = 0
template handleLF() {.dirty.} = template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1 my.charlen - my.charoffset - 1
my.line.inc() my.line.inc()
my.column = 0 curPos = 0
template `or`(r: Rune, i: int): Rune = template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i) cast[Rune](cast[int](r) or i)
@ -192,6 +198,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
blockScalarIndentation = -1 blockScalarIndentation = -1
# when parsing a block scalar, this will be set to the indentation # when parsing a block scalar, this will be set to the indentation
# of the line that starts the flow scalar. # of the line that starts the flow scalar.
curPos = 0
while true: while true:
let c = my.buf[my.bufpos + my.charoffset] let c = my.buf[my.bufpos + my.charoffset]
@ -225,10 +232,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylInitialContent: of ylInitialContent:
case c case c
of '-': of '-':
my.column = 0
state = ylDashes state = ylDashes
continue continue
of '.': of '.':
yieldToken(yamlLineStart) yieldToken(yamlLineStart)
my.column = 0
state = ylDots state = ylDots
continue continue
else: else:
@ -253,6 +262,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" my.content = ""
yieldToken(yamlLineStart) yieldToken(yamlLineStart)
my.content = tmp my.content = tmp
my.column = curPos
state = ylPlainScalar state = ylPlainScalar
continue continue
else: else:
@ -489,6 +499,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylInitialInLine: of ylInitialInLine:
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
my.column = curPos - 1
case c case c
of ' ', '\t', '\r', '\x0A', EndOfFile: of ' ', '\t', '\r', '\x0A', EndOfFile:
case lastSpecialChar case lastSpecialChar
@ -518,6 +529,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
else: else:
my.content.add(lastSpecialChar) my.content.add(lastSpecialChar)
lastSpecialChar = '\0' lastSpecialChar = '\0'
my.column = curPos - 1
state = ylPlainScalar state = ylPlainScalar
continue continue
case c case c
@ -529,6 +541,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(yamlComma) yieldToken(yamlComma)
else: else:
my.content = "" & c my.content = "" & c
my.column = curPos
state = ylPlainScalar state = ylPlainScalar
of '[': of '[':
inc(flowDepth) inc(flowDepth)
@ -547,8 +560,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of '#': of '#':
lastSpecialChar = '#' lastSpecialChar = '#'
of '"': of '"':
my.column = curPos
state = ylDoublyQuotedScalar state = ylDoublyQuotedScalar
of '\'': of '\'':
my.column = curPos
state = ylSingleQuotedScalar state = ylSingleQuotedScalar
of '!': of '!':
lastSpecialChar = '!' lastSpecialChar = '!'
@ -563,6 +578,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
lastSpecialChar = '-' lastSpecialChar = '-'
else: else:
my.content = "" & c my.content = "" & c
my.column = curPos
state = ylPlainScalar state = ylPlainScalar
of '?', ':': of '?', ':':
lastSpecialChar = c lastSpecialChar = c
@ -576,6 +592,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
discard discard
else: else:
my.content = "" & c my.content = "" & c
my.column = curPos
state = ylPlainScalar state = ylPlainScalar
of ylComment, ylDirectiveComment: of ylComment, ylDirectiveComment:
case c case c
@ -817,4 +834,4 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(c) my.content.add(c)
my.bufpos += my.charlen my.bufpos += my.charlen
my.column.inc curPos.inc

View File

@ -87,6 +87,7 @@ template closeLevelsByIndicator() {.dirty.} =
closeLevel() closeLevel()
elif level.indicatorColumn == -1: elif level.indicatorColumn == -1:
if levels[levels.high - 1].indicatorColumn >= lex.column: if levels[levels.high - 1].indicatorColumn >= lex.column:
echo "seq ind col: ", levels[levels.high - 1].indicatorColumn, ", lex.column: ", lex.column
closeLevel() closeLevel()
else: else:
break break
@ -310,6 +311,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
cachedScalar = YamlParserEvent(kind: yamlScalar, cachedScalar = YamlParserEvent(kind: yamlScalar,
scalarAnchor: nil, scalarTag: nil, scalarAnchor: nil, scalarTag: nil,
scalarContent: lex.content) scalarContent: lex.content)
cachedScalarIndentation = lex.column
state = ylBlockAfterScalar state = ylBlockAfterScalar
of yamlStreamEnd: of yamlStreamEnd:
closeAllLevels() closeAllLevels()
@ -331,7 +333,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
levels.add(DocumentLevel(kind: lUnknown)) levels.add(DocumentLevel(kind: lUnknown))
level = addr(levels[levels.high]) level = addr(levels[levels.high])
level.kind = lMap level.kind = lMap
level.indicatorColumn = lex.column level.indicatorColumn = cachedScalarIndentation
level.readKey = true level.readKey = true
yield YamlParserEvent(kind: yamlStartMap) yield YamlParserEvent(kind: yamlStartMap)
yield cachedScalar yield cachedScalar
@ -340,6 +342,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
cachedScalar = nil cachedScalar = nil
state = ylBlockAfterColon state = ylBlockAfterColon
of yamlLineStart: of yamlLineStart:
yield cachedScalar
state = ylBlockLineStart state = ylBlockLineStart
of yamlStreamEnd: of yamlStreamEnd:
yield cachedScalar yield cachedScalar
@ -378,5 +381,4 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
$token.kind) $token.kind)
else: else:
discard discard
token = nextToken(lex) token = nextToken(lex)

View File

@ -89,3 +89,15 @@ suite "Parsing":
test "Parsing: Simple Map": test "Parsing: Simple Map":
ensure("key: value", startDoc(), startMap(), scalar("key"), ensure("key: value", startDoc(), startMap(), scalar("key"),
scalar("value"), endMap(), endDoc()) scalar("value"), endMap(), endDoc())
test "Parsing: Map in Sequence":
ensure(" - key: value", startDoc(), startSequence(), startMap(),
scalar("key"), scalar("value"), endMap(), endSequence(),
endDoc())
test "Parsing: Sequence in Map":
ensure("key:\n - item1\n - item2", startDoc(), startMap(),
scalar("key"), startSequence(), scalar("item1"), scalar("item2"),
endSequence(), endMap(), endDoc())
test "Parsing: Sequence in Sequence":
ensure("- - l1_i1\n - l1_i2\n- l2_i1", startDoc(), startSequence(),
startSequence(), scalar("l1_i1"), scalar("l1_i2"), endSequence(),
scalar("l2_i1"), endSequence(), endDoc())