mirror of https://github.com/status-im/NimYAML.git
Bug fixes in lexer and parser
* Lexer: column now always points to start of token * Lexer: added ability to compile with -d:yamlDebug for debug output * Parser: Correctly handle nested sequences and maps * Tests: Added parser tests for nested objects
This commit is contained in:
parent
73ce41bbef
commit
e642e41f90
|
@ -145,6 +145,12 @@ proc open*(my: var YamlLexer, input: Stream) =
|
||||||
my.column = 0
|
my.column = 0
|
||||||
|
|
||||||
template yieldToken(mKind: YamlLexerTokenKind) {.dirty.} =
|
template yieldToken(mKind: YamlLexerTokenKind) {.dirty.} =
|
||||||
|
when defined(yamlDebug):
|
||||||
|
if mKind == yamlScalar:
|
||||||
|
echo "Lexer token: yamlScalar(\"", my.content, "\")"
|
||||||
|
else:
|
||||||
|
echo "Lexer token: ", mKind
|
||||||
|
|
||||||
yield (kind: mKind)
|
yield (kind: mKind)
|
||||||
my.content = ""
|
my.content = ""
|
||||||
|
|
||||||
|
@ -157,13 +163,13 @@ template handleCR() {.dirty.} =
|
||||||
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
|
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
|
||||||
my.charoffset - 1
|
my.charoffset - 1
|
||||||
my.line.inc()
|
my.line.inc()
|
||||||
my.column = 0
|
curPos = 0
|
||||||
|
|
||||||
template handleLF() {.dirty.} =
|
template handleLF() {.dirty.} =
|
||||||
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
|
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
|
||||||
my.charlen - my.charoffset - 1
|
my.charlen - my.charoffset - 1
|
||||||
my.line.inc()
|
my.line.inc()
|
||||||
my.column = 0
|
curPos = 0
|
||||||
|
|
||||||
template `or`(r: Rune, i: int): Rune =
|
template `or`(r: Rune, i: int): Rune =
|
||||||
cast[Rune](cast[int](r) or i)
|
cast[Rune](cast[int](r) or i)
|
||||||
|
@ -192,6 +198,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
blockScalarIndentation = -1
|
blockScalarIndentation = -1
|
||||||
# when parsing a block scalar, this will be set to the indentation
|
# when parsing a block scalar, this will be set to the indentation
|
||||||
# of the line that starts the flow scalar.
|
# of the line that starts the flow scalar.
|
||||||
|
curPos = 0
|
||||||
|
|
||||||
while true:
|
while true:
|
||||||
let c = my.buf[my.bufpos + my.charoffset]
|
let c = my.buf[my.bufpos + my.charoffset]
|
||||||
|
@ -225,10 +232,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
of ylInitialContent:
|
of ylInitialContent:
|
||||||
case c
|
case c
|
||||||
of '-':
|
of '-':
|
||||||
|
my.column = 0
|
||||||
state = ylDashes
|
state = ylDashes
|
||||||
continue
|
continue
|
||||||
of '.':
|
of '.':
|
||||||
yieldToken(yamlLineStart)
|
yieldToken(yamlLineStart)
|
||||||
|
my.column = 0
|
||||||
state = ylDots
|
state = ylDots
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -253,6 +262,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
my.content = ""
|
my.content = ""
|
||||||
yieldToken(yamlLineStart)
|
yieldToken(yamlLineStart)
|
||||||
my.content = tmp
|
my.content = tmp
|
||||||
|
my.column = curPos
|
||||||
state = ylPlainScalar
|
state = ylPlainScalar
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -489,6 +499,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
|
|
||||||
of ylInitialInLine:
|
of ylInitialInLine:
|
||||||
if lastSpecialChar != '\0':
|
if lastSpecialChar != '\0':
|
||||||
|
my.column = curPos - 1
|
||||||
case c
|
case c
|
||||||
of ' ', '\t', '\r', '\x0A', EndOfFile:
|
of ' ', '\t', '\r', '\x0A', EndOfFile:
|
||||||
case lastSpecialChar
|
case lastSpecialChar
|
||||||
|
@ -518,6 +529,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
else:
|
else:
|
||||||
my.content.add(lastSpecialChar)
|
my.content.add(lastSpecialChar)
|
||||||
lastSpecialChar = '\0'
|
lastSpecialChar = '\0'
|
||||||
|
my.column = curPos - 1
|
||||||
state = ylPlainScalar
|
state = ylPlainScalar
|
||||||
continue
|
continue
|
||||||
case c
|
case c
|
||||||
|
@ -529,6 +541,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
yieldToken(yamlComma)
|
yieldToken(yamlComma)
|
||||||
else:
|
else:
|
||||||
my.content = "" & c
|
my.content = "" & c
|
||||||
|
my.column = curPos
|
||||||
state = ylPlainScalar
|
state = ylPlainScalar
|
||||||
of '[':
|
of '[':
|
||||||
inc(flowDepth)
|
inc(flowDepth)
|
||||||
|
@ -547,8 +560,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
of '#':
|
of '#':
|
||||||
lastSpecialChar = '#'
|
lastSpecialChar = '#'
|
||||||
of '"':
|
of '"':
|
||||||
|
my.column = curPos
|
||||||
state = ylDoublyQuotedScalar
|
state = ylDoublyQuotedScalar
|
||||||
of '\'':
|
of '\'':
|
||||||
|
my.column = curPos
|
||||||
state = ylSingleQuotedScalar
|
state = ylSingleQuotedScalar
|
||||||
of '!':
|
of '!':
|
||||||
lastSpecialChar = '!'
|
lastSpecialChar = '!'
|
||||||
|
@ -563,6 +578,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
lastSpecialChar = '-'
|
lastSpecialChar = '-'
|
||||||
else:
|
else:
|
||||||
my.content = "" & c
|
my.content = "" & c
|
||||||
|
my.column = curPos
|
||||||
state = ylPlainScalar
|
state = ylPlainScalar
|
||||||
of '?', ':':
|
of '?', ':':
|
||||||
lastSpecialChar = c
|
lastSpecialChar = c
|
||||||
|
@ -576,6 +592,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
discard
|
discard
|
||||||
else:
|
else:
|
||||||
my.content = "" & c
|
my.content = "" & c
|
||||||
|
my.column = curPos
|
||||||
state = ylPlainScalar
|
state = ylPlainScalar
|
||||||
of ylComment, ylDirectiveComment:
|
of ylComment, ylDirectiveComment:
|
||||||
case c
|
case c
|
||||||
|
@ -817,4 +834,4 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
|
||||||
my.content.add(c)
|
my.content.add(c)
|
||||||
|
|
||||||
my.bufpos += my.charlen
|
my.bufpos += my.charlen
|
||||||
my.column.inc
|
curPos.inc
|
|
@ -87,6 +87,7 @@ template closeLevelsByIndicator() {.dirty.} =
|
||||||
closeLevel()
|
closeLevel()
|
||||||
elif level.indicatorColumn == -1:
|
elif level.indicatorColumn == -1:
|
||||||
if levels[levels.high - 1].indicatorColumn >= lex.column:
|
if levels[levels.high - 1].indicatorColumn >= lex.column:
|
||||||
|
echo "seq ind col: ", levels[levels.high - 1].indicatorColumn, ", lex.column: ", lex.column
|
||||||
closeLevel()
|
closeLevel()
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
@ -310,6 +311,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
|
||||||
cachedScalar = YamlParserEvent(kind: yamlScalar,
|
cachedScalar = YamlParserEvent(kind: yamlScalar,
|
||||||
scalarAnchor: nil, scalarTag: nil,
|
scalarAnchor: nil, scalarTag: nil,
|
||||||
scalarContent: lex.content)
|
scalarContent: lex.content)
|
||||||
|
cachedScalarIndentation = lex.column
|
||||||
state = ylBlockAfterScalar
|
state = ylBlockAfterScalar
|
||||||
of yamlStreamEnd:
|
of yamlStreamEnd:
|
||||||
closeAllLevels()
|
closeAllLevels()
|
||||||
|
@ -331,7 +333,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
|
||||||
levels.add(DocumentLevel(kind: lUnknown))
|
levels.add(DocumentLevel(kind: lUnknown))
|
||||||
level = addr(levels[levels.high])
|
level = addr(levels[levels.high])
|
||||||
level.kind = lMap
|
level.kind = lMap
|
||||||
level.indicatorColumn = lex.column
|
level.indicatorColumn = cachedScalarIndentation
|
||||||
level.readKey = true
|
level.readKey = true
|
||||||
yield YamlParserEvent(kind: yamlStartMap)
|
yield YamlParserEvent(kind: yamlStartMap)
|
||||||
yield cachedScalar
|
yield cachedScalar
|
||||||
|
@ -340,6 +342,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
|
||||||
cachedScalar = nil
|
cachedScalar = nil
|
||||||
state = ylBlockAfterColon
|
state = ylBlockAfterColon
|
||||||
of yamlLineStart:
|
of yamlLineStart:
|
||||||
|
yield cachedScalar
|
||||||
state = ylBlockLineStart
|
state = ylBlockLineStart
|
||||||
of yamlStreamEnd:
|
of yamlStreamEnd:
|
||||||
yield cachedScalar
|
yield cachedScalar
|
||||||
|
@ -378,5 +381,4 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
|
||||||
$token.kind)
|
$token.kind)
|
||||||
else:
|
else:
|
||||||
discard
|
discard
|
||||||
|
|
||||||
token = nextToken(lex)
|
token = nextToken(lex)
|
|
@ -89,3 +89,15 @@ suite "Parsing":
|
||||||
test "Parsing: Simple Map":
|
test "Parsing: Simple Map":
|
||||||
ensure("key: value", startDoc(), startMap(), scalar("key"),
|
ensure("key: value", startDoc(), startMap(), scalar("key"),
|
||||||
scalar("value"), endMap(), endDoc())
|
scalar("value"), endMap(), endDoc())
|
||||||
|
test "Parsing: Map in Sequence":
|
||||||
|
ensure(" - key: value", startDoc(), startSequence(), startMap(),
|
||||||
|
scalar("key"), scalar("value"), endMap(), endSequence(),
|
||||||
|
endDoc())
|
||||||
|
test "Parsing: Sequence in Map":
|
||||||
|
ensure("key:\n - item1\n - item2", startDoc(), startMap(),
|
||||||
|
scalar("key"), startSequence(), scalar("item1"), scalar("item2"),
|
||||||
|
endSequence(), endMap(), endDoc())
|
||||||
|
test "Parsing: Sequence in Sequence":
|
||||||
|
ensure("- - l1_i1\n - l1_i2\n- l2_i1", startDoc(), startSequence(),
|
||||||
|
startSequence(), scalar("l1_i1"), scalar("l1_i2"), endSequence(),
|
||||||
|
scalar("l2_i1"), endSequence(), endDoc())
|
Loading…
Reference in New Issue