Bug fixes in lexer and parser

* Lexer: column now always points to start of token
 * Lexer: added ability to compile with -d:yamlDebug for debug output
 * Parser: Correctly handle nested sequences and maps
 * Tests: Added parser tests for nested objects
This commit is contained in:
Felix Krause 2015-12-10 20:53:43 +01:00
parent 73ce41bbef
commit e642e41f90
3 changed files with 37 additions and 6 deletions

View File

@ -145,6 +145,12 @@ proc open*(my: var YamlLexer, input: Stream) =
my.column = 0
template yieldToken(mKind: YamlLexerTokenKind) {.dirty.} =
when defined(yamlDebug):
if mKind == yamlScalar:
echo "Lexer token: yamlScalar(\"", my.content, "\")"
else:
echo "Lexer token: ", mKind
yield (kind: mKind)
my.content = ""
@ -157,13 +163,13 @@ template handleCR() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
my.charoffset - 1
my.line.inc()
my.column = 0
curPos = 0
template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1
my.line.inc()
my.column = 0
curPos = 0
template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i)
@ -192,6 +198,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
blockScalarIndentation = -1
# when parsing a block scalar, this will be set to the indentation
# of the line that starts the flow scalar.
curPos = 0
while true:
let c = my.buf[my.bufpos + my.charoffset]
@ -225,10 +232,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylInitialContent:
case c
of '-':
my.column = 0
state = ylDashes
continue
of '.':
yieldToken(yamlLineStart)
my.column = 0
state = ylDots
continue
else:
@ -253,6 +262,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = ""
yieldToken(yamlLineStart)
my.content = tmp
my.column = curPos
state = ylPlainScalar
continue
else:
@ -489,6 +499,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylInitialInLine:
if lastSpecialChar != '\0':
my.column = curPos - 1
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
case lastSpecialChar
@ -518,6 +529,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
else:
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
my.column = curPos - 1
state = ylPlainScalar
continue
case c
@ -529,6 +541,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(yamlComma)
else:
my.content = "" & c
my.column = curPos
state = ylPlainScalar
of '[':
inc(flowDepth)
@ -547,8 +560,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of '#':
lastSpecialChar = '#'
of '"':
my.column = curPos
state = ylDoublyQuotedScalar
of '\'':
my.column = curPos
state = ylSingleQuotedScalar
of '!':
lastSpecialChar = '!'
@ -563,6 +578,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
lastSpecialChar = '-'
else:
my.content = "" & c
my.column = curPos
state = ylPlainScalar
of '?', ':':
lastSpecialChar = c
@ -576,6 +592,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
discard
else:
my.content = "" & c
my.column = curPos
state = ylPlainScalar
of ylComment, ylDirectiveComment:
case c
@ -817,4 +834,4 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(c)
my.bufpos += my.charlen
my.column.inc
curPos.inc

View File

@ -87,6 +87,7 @@ template closeLevelsByIndicator() {.dirty.} =
closeLevel()
elif level.indicatorColumn == -1:
if levels[levels.high - 1].indicatorColumn >= lex.column:
echo "seq ind col: ", levels[levels.high - 1].indicatorColumn, ", lex.column: ", lex.column
closeLevel()
else:
break
@ -310,6 +311,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
cachedScalar = YamlParserEvent(kind: yamlScalar,
scalarAnchor: nil, scalarTag: nil,
scalarContent: lex.content)
cachedScalarIndentation = lex.column
state = ylBlockAfterScalar
of yamlStreamEnd:
closeAllLevels()
@ -331,7 +333,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
levels.add(DocumentLevel(kind: lUnknown))
level = addr(levels[levels.high])
level.kind = lMap
level.indicatorColumn = lex.column
level.indicatorColumn = cachedScalarIndentation
level.readKey = true
yield YamlParserEvent(kind: yamlStartMap)
yield cachedScalar
@ -340,6 +342,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
cachedScalar = nil
state = ylBlockAfterColon
of yamlLineStart:
yield cachedScalar
state = ylBlockLineStart
of yamlStreamEnd:
yield cachedScalar
@ -378,5 +381,4 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
$token.kind)
else:
discard
token = nextToken(lex)

View File

@ -88,4 +88,16 @@ suite "Parsing":
endSequence(), endDoc())
test "Parsing: Simple Map":
ensure("key: value", startDoc(), startMap(), scalar("key"),
scalar("value"), endMap(), endDoc())
scalar("value"), endMap(), endDoc())
test "Parsing: Map in Sequence":
ensure(" - key: value", startDoc(), startSequence(), startMap(),
scalar("key"), scalar("value"), endMap(), endSequence(),
endDoc())
test "Parsing: Sequence in Map":
ensure("key:\n - item1\n - item2", startDoc(), startMap(),
scalar("key"), startSequence(), scalar("item1"), scalar("item2"),
endSequence(), endMap(), endDoc())
test "Parsing: Sequence in Sequence":
ensure("- - l1_i1\n - l1_i2\n- l2_i1", startDoc(), startSequence(),
startSequence(), scalar("l1_i1"), scalar("l1_i2"), endSequence(),
scalar("l2_i1"), endSequence(), endDoc())