Added parsing tests; got basic parsing working

This commit is contained in:
Felix Krause 2015-12-07 22:09:57 +01:00
parent ada4a36e6e
commit 73ce41bbef
4 changed files with 317 additions and 59 deletions

View File

@ -1,6 +1,6 @@
import streams, tables, strutils
import private.lexer
import "private/lexer"
type
YamlParserEventKind* = enum
@ -27,7 +27,8 @@ type
column* : int
YamlParserState = enum
ylInitial, ylSkipDirective, ylBlock, ylFlow
ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterScalar,
ylBlockAfterColon, ylBlockLineEnd, ylFlow
OutcomeEnum = enum
oOkay, oWarn, oContinue
@ -41,6 +42,25 @@ type
readKey: bool
anchor: string
tag: string
proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool =
if left.kind != right.kind:
return false
case left.kind
of yamlStartDocument, yamlEndDocument, yamlEndMap, yamlEndSequence:
result = true
of yamlStartMap, yamlStartSequence:
result = left.objAnchor == right.objAnchor and
left.objTag == right.objTag
of yamlScalar:
result = left.scalarAnchor == right.scalarAnchor and
left.scalarTag == right.scalarTag and
left.scalarContent == right.scalarContent
of yamlAlias:
result = left.aliasName == right.aliasName
of yamlError, yamlWarning:
result = left.description == right.description and
left.line == right.line and left.column == right.column
template yieldWarning(d: string) {.dirty.} =
yield YamlParserEvent(kind: yamlWarning, description: d,
@ -50,23 +70,11 @@ template yieldError(d: string) {.dirty.} =
yield YamlParserEvent(kind: yamlError, description: d,
line: lex.line, column: lex.column)
template tag(): string {.dirty.} =
if isNil(level.tag):
case level.kind
of lUnknown:
result = "!!str"
of lSequence:
result = "!!seq"
of lMap:
result = "!!map"
else:
return level.tag
template closeLevel() {.dirty.} =
case level.kind
of lUnknown:
yield YamlParserEvent(kind: yamlScalar, scalarAnchor: level.anchor,
scalarTag: tag(), scalarContent: "")
scalarTag: level.tag, scalarContent: "")
of lSequence:
yield YamlParserEvent(kind: yamlEndSequence)
of lMap:
@ -77,21 +85,33 @@ template closeLevelsByIndicator() {.dirty.} =
let level = levels[levels.high]
if level.indicatorColumn > lex.column:
closeLevel()
elif level.indicatorColumn == -1:
if levels[levels.high - 1].indicatorColumn >= lex.column:
closeLevel()
else:
break
else:
break
levels.pop()
discard levels.pop()
iterator events*(input: Stream): YamlParserEvent =
template closeAllLevels() {.dirty.} =
while levels.len > 0:
var level = levels.pop()
closeLevel()
iterator events*(input: Stream): YamlParserEvent {.closure.} =
var
state = ylInitial
lex : YamlLexer
foundYamlDirective = false
tagShorthands = initTable[string, string]()
levels = initSeq[DocumentLevel]()
levels = newSeq[DocumentLevel]()
curIndentation: int
cachedScalar: YamlParserEvent
cachedScalarIndentation: int
lex.open(input)
var nextToken = lexer.tokens
var nextToken = tokens
var token = nextToken(lex)
while not finished(nextToken):
case state
@ -157,34 +177,37 @@ iterator events*(input: Stream): YamlParserEvent =
discard
of yamlDirectivesEnd:
yield YamlParserEvent(kind: yamlStartDocument)
state = ylLineStart
of yamlDocumentEnd:
state = ylBlockLinestart
of yamlDocumentEnd, yamlStreamEnd:
yield YamlParserEvent(kind: yamlStartDocument)
yield YamlParserEvent(kind: yamlEndDocument)
else:
yield YamlParserEvent(kind: yamlStartDocument)
state = ylLineStart
state = ylBlockLineStart
continue
of ylSkipDirective:
if token.kind not in [yamlUnknownDirectiveParam, yamlTagHandle,
yamlTagURI, yamlVersionPart, yamlComment]:
if token.kind notin [yamlUnknownDirectiveParam, yamlTagHandle,
yamlTagURI, yamlVersionPart, yamlComment]:
state = ylInitial
continue
of ylBlock:
of ylBlockLineStart:
case token.kind
of yamlLineStart:
discard
of yamlDash:
closeLevelsByIndicator()
if levels.count > 0:
let level = levels[levels.high]
if levels.len > 0:
var level = addr(levels[levels.high])
if level.kind == lUnknown:
level.kind = lSequence
level.indicatorColumn = lex.column
levels.add(DocumentLevel(kind: lUnknown,
indicatorColumn = -1,
indicatorColumn: -1,
readKey: false,
anchor: nil, tag: nil))
yield YamlParserEvent(kind: yamlStartSequence,
objAnchor: level.anchor,
objTag: level.tag)
elif level.indicatorColumn < lex.column:
yieldError("Invalid indentation for '-'")
elif level.kind == lSequence:
@ -200,16 +223,160 @@ iterator events*(input: Stream): YamlParserEvent =
readKey: false,
anchor: nil, tag: nil))
levels.add(DocumentLevel(kind: lUnknown,
indicatorColmun: -1,
indicatorColumn: -1,
readKey: false,
anchor: nil, tag: nil))
of yamlQuestionmark:
yield YamlParserEvent(kind: yamlStartSequence,
objAnchor: nil, objTag: nil)
of yamlQuestionmark, yamlColon:
closeLevelsByIndicator()
if levels.len > 0:
var level = addr(levels[levels.high])
if level.kind == lUnknown:
level.kind = lMap
level.indicatorColumn = lex.column
levels.add(DocumentLevel(kind: lUnknown,
indicatorColumn: -1,
readKey: true,
anchor: nil, tag: nil))
yield YamlParserEvent(kind: yamlStartMap,
objAnchor: level.anchor,
objTag: level.tag)
if token.kind == yamlColon:
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: level.anchor,
scalarTag: level.tag,
scalarContent: "")
level.readKey = false
elif level.indicatorColumn < lex.column:
yieldError("Invalid indentation for '?'")
elif level.kind == lMap and level.readKey ==
(token.kind == yamlColon):
level.readKey = true
levels.add(DocumentLevel(kind: lUnknown,
indicatorColumn: -1,
readKey: (token.kind == yamlQuestionmark),
anchor: nil, tag: nil))
else:
yieldError("Unexpected token: '?'")
else:
levels.add(DocumentLevel(kind: lMap,
indicatorColumn: lex.column,
readKey: true,
anchor: nil, tag: nil))
var level = addr(levels[levels.high])
levels.add(DocumentLevel(kind: lUnknown,
indicatorColumn: -1,
readKey: false,
anchor: nil, tag: nil))
yield YamlParserEvent(kind: yamlStartMap,
objAnchor: nil,
objTag: nil)
if token.kind == yamlColon:
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: nil,
scalarTag: nil,
scalarContent: "")
level.readKey = false
of yamlTagHandle:
var level = addr(levels[levels.high])
let handle = lex.content
if tagShorthands.hasKey(handle):
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing tag suffix")
continue
if token.kind != yamlTagSuffix:
yieldError("Missing tag suffix")
continue
level.tag = tagShorthands[handle] & lex.content
else:
yieldError("Unknown tag shorthand: " & handle)
of yamlVerbatimTag:
levels[levels.high].tag = lex.content
of lexer.yamlScalar:
closeLevelsByIndicator()
if levels.len > 0:
let level = levels.pop()
if level.kind != lUnknown:
yieldError("Unexpected scalar in " & $level.kind)
else:
cachedScalar = YamlParserEvent(kind: yamlScalar,
scalarAnchor: level.anchor,
scalarTag: level.tag,
scalarContent: lex.content)
cachedScalarIndentation = lex.column
else:
cachedScalar = YamlParserEvent(kind: yamlScalar,
scalarAnchor: nil, scalarTag: nil,
scalarContent: lex.content)
state = ylBlockAfterScalar
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
of yamlDocumentEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
state = ylInitial
else:
yieldError("Unexpected token: " & $token.kind)
of ylBlockAfterScalar:
case token.kind
of yamlColon:
var level: ptr DocumentLevel = nil
if levels.len > 0:
level = addr(levels[levels.high])
if level == nil or level.kind != lUnknown:
levels.add(DocumentLevel(kind: lUnknown))
level = addr(levels[levels.high])
level.kind = lMap
level.indicatorColumn = lex.column
level.readKey = true
yield YamlParserEvent(kind: yamlStartMap)
yield cachedScalar
levels.add(DocumentLevel(kind: lUnknown,
indicatorColumn: -1))
cachedScalar = nil
state = ylBlockAfterColon
of yamlLineStart:
state = ylBlockLineStart
of yamlStreamEnd:
yield cachedScalar
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
yieldError("Unexpected token: " & $token.kind)
of ylBlockAfterColon:
case token.kind
of lexer.yamlScalar:
var level = levels.pop()
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: level.anchor, scalarTag: level.tag,
scalarContent: lex.content)
state = ylBlockLineEnd
of yamlLineStart:
state = ylBlockLineStart
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
yieldError("Unexpected token (expected scalar or line end): " &
$token.kind)
of ylBlockLineEnd:
case token.kind
of yamlLineStart:
state = ylBlockLineStart
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
yieldError("Unexpected token (expected line end):" &
$token.kind)
else:
discard
token = nextToken(lex)

View File

@ -39,26 +39,26 @@ proc t(kind: YamlLexerTokenKind, content: string): BasicLexerToken =
(kind: kind, content: content)
suite "Lexing":
test "YAML Directive":
test "Lexing: YAML Directive":
ensure("%YAML 1.2", [t(yamlYamlDirective, nil),
t(yamlVersionPart, "1"),
t(yamlVersionPart, "2"),
t(yamlStreamEnd, nil)])
test "TAG Directive":
test "Lexing: TAG Directive":
ensure("%TAG !t! tag:http://example.com/",
[t(yamlTagDirective, nil),
t(yamlTagHandle, "!t!"),
t(yamlTagURI, "tag:http://example.com/"),
t(yamlStreamEnd, nil)])
test "Unknown Directive":
test "Lexing: Unknown Directive":
ensure("%FOO bar baz", [t(yamlUnknownDirective, "%FOO"),
t(yamlUnknownDirectiveParam, "bar"),
t(yamlUnknownDirectiveParam, "baz"),
t(yamlStreamEnd, nil)])
test "Comments after Directives":
test "Lexing: Comments after Directives":
ensure("%YAML 1.2 # version\n# at line start\n # indented\n%FOO",
[t(yamlYamlDirective, nil),
t(yamlVersionPart, "1"),
@ -69,16 +69,16 @@ suite "Lexing":
t(yamlUnknownDirective, "%FOO"),
t(yamlStreamEnd, nil)])
test "Directives End":
test "Lexing: Directives End":
ensure("---", [t(yamlDirectivesEnd, nil),
t(yamlStreamEnd, nil)])
test "Document End":
test "Lexing: Document End":
ensure("...", [t(yamlLineStart, nil),
t(yamlDocumentEnd, nil),
t(yamlStreamEnd, nil)])
test "Directive after Document End":
test "Lexing: Directive after Document End":
ensure("content\n...\n%YAML 1.2",
[t(yamlLineStart, ""),
t(yamlScalar, "content"),
@ -89,63 +89,63 @@ suite "Lexing":
t(yamlVersionPart, "2"),
t(yamlStreamEnd, nil)])
test "Plain Scalar (alphanumeric)":
test "Lexing: Plain Scalar (alphanumeric)":
ensure("abA03rel4", [t(yamlLineStart, ""),
t(yamlScalar, "abA03rel4"),
t(yamlStreamEnd, nil)])
test "Plain Scalar (with spaces)":
test "Lexing: Plain Scalar (with spaces)":
ensure("test content", [t(yamlLineStart, ""),
t(yamlScalar, "test content"),
t(yamlStreamEnd, nil)])
test "Plain Scalar (with special chars)":
test "Lexing: Plain Scalar (with special chars)":
ensure(":test ?content -with #special !chars",
[t(yamlLineStart, nil),
t(yamlScalar, ":test ?content -with #special !chars"),
t(yamlStreamEnd, nil)])
test "Plain Scalar (starting with %)":
test "Lexing: Plain Scalar (starting with %)":
ensure("---\n%test", [t(yamlDirectivesEnd, nil),
t(yamlLineStart, ""),
t(yamlScalar, "%test"),
t(yamlStreamEnd, nil)])
test "Single Quoted Scalar":
test "Lexing: Single Quoted Scalar":
ensure("'? test - content! '", [t(yamlLineStart, ""),
t(yamlScalar, "? test - content! "),
t(yamlStreamEnd, nil)])
test "Single Quoted Scalar (escaped single quote inside)":
test "Lexing: Single Quoted Scalar (escaped single quote inside)":
ensure("'test '' content'", [t(yamlLineStart, ""),
t(yamlScalar, "test ' content"),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar":
test "Lexing: Doubly Quoted Scalar":
ensure("\"test content\"", [t(yamlLineStart, ""),
t(yamlScalar, "test content"),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar (escaping)":
test "Lexing: Doubly Quoted Scalar (escaping)":
ensure(""""\t\\\0\""""", [t(yamlLineStart, ""),
t(yamlScalar, "\t\\\0\""),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar (unicode escaping)":
test "Lexing: Doubly Quoted Scalar (unicode escaping)":
ensure(""""\x42\u4243\U00424344"""",
[t(yamlLineStart, ""),
t(yamlScalar, "\x42" & toUTF8(cast[Rune](0x4243)) &
toUTF8(cast[Rune](0x424344))),
t(yamlStreamEnd, nil)])
test "Block Array":
test "Lexing: Block Array":
ensure("""
- a
- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "a"),
t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "b"),
t(yamlStreamEnd, nil)])
test "Block Map with Implicit Keys":
test "Lexing: Block Map with Implicit Keys":
ensure("""
foo: bar
herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil),
@ -153,14 +153,14 @@ herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil),
t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlScalar, "derp"),
t(yamlStreamEnd, nil)])
test "Block Map with Explicit Keys":
test "Lexing: Block Map with Explicit Keys":
ensure("""
? foo
: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), t(yamlScalar, "foo"),
t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalar, "bar"),
t(yamlStreamEnd, nil)])
test "Indentation":
test "Lexing: Indentation":
ensure("""
foo:
bar:
@ -174,22 +174,22 @@ foo:
t(yamlLineStart, " "), t(yamlScalar, "herp"), t(yamlColon, nil),
t(yamlScalar, "derp"), t(yamlStreamEnd, nil)])
test "Anchor":
test "Lexing: Anchor":
ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"),
t(yamlColon, nil), t(yamlAnchor, "bar"),
t(yamlStreamEnd, nil)])
test "Alias":
test "Lexing: Alias":
ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"),
t(yamlColon, nil), t(yamlAlias, "bar"),
t(yamlStreamEnd, nil)])
test "Tag handle":
test "Lexing: Tag handle":
ensure("!t!str tagged", [t(yamlLineStart, ""), t(yamlTagHandle, "!t!"),
t(yamlTagSuffix, "str"),
t(yamlScalar, "tagged"), t(yamlStreamEnd, nil)])
test "Verbatim tag handle":
test "Lexing: Verbatim tag handle":
ensure("!<tag:http://example.com/str> tagged",
[t(yamlLineStart, ""),
t(yamlVerbatimTag, "tag:http://example.com/str"),

91
test/parsing.nim Normal file
View File

@ -0,0 +1,91 @@
import "../src/yaml/sequential"
import unittest
import streams
proc startDoc(): YamlParserEvent =
new(result)
result.kind = yamlStartDocument
proc endDoc(): YamlParserEvent =
new(result)
result.kind = yamlEndDocument
proc scalar(content: string,
anchor: string = nil, tag: string = nil): YamlParserEvent =
new(result)
result.kind = yamlScalar
result.scalarAnchor = anchor
result.scalarTag = tag
result.scalarContent = content
proc startSequence(anchor: string = nil, tag: string = nil): YamlParserEvent =
new(result)
result.kind = yamlStartSequence
result.objAnchor = anchor
result.objTag = tag
proc endSequence(): YamlParserEvent =
new(result)
result.kind = yamlEndSequence
proc startMap(anchor: string = nil, tag: string = nil): YamlParserEvent =
new(result)
result.kind = yamlStartMap
result.objAnchor = anchor
result.objTag = tag
proc endMap(): YamlParserEvent =
new(result)
result.kind = yamlEndMap
proc printDifference(expected, actual: YamlParserEvent) =
if expected.kind != actual.kind:
echo "expected " & $expected.kind & ", got " & $actual.kind
if actual.kind == yamlError:
echo "Error message: " & actual.description
elif actual.kind == yamlWarning:
echo "Warning message: " & actual.description
else:
case expected.kind
of yamlScalar:
if expected.scalarTag != actual.scalarTag:
echo "[scalar] expected tag " & expected.scalarTag & ", got " &
actual.scalarTag
elif expected.scalarAnchor != actual.scalarAnchor:
echo "[scalar] expected anchor " & expected.scalarAnchor &
", got " & actual.scalarAnchor
elif expected.scalarContent != actual.scalarContent:
echo "[scalar] expected content \"" & expected.scalarContent &
"\", got \"" & actual.scalarContent & "\""
else:
echo "[scalar] Unknown difference"
else:
echo "Unknown difference in event kind " & $expected.kind
template ensure(input: string, expected: varargs[YamlParserEvent]) {.dirty.} =
var
i = 0
for token in events(newStringStream(input)):
if i >= expected.len:
echo "received more tokens than expected (next token = ",
token.kind, ")"
fail()
break
if token != expected[i]:
echo "at token #" & $i & ":"
printDifference(expected[i], token)
fail()
break
i.inc()
suite "Parsing":
test "Parsing: Simple Scalar":
ensure("Scalar", startDoc(), scalar("Scalar"), endDoc())
test "Parsing: Simple Sequence":
ensure("- item", startDoc(), startSequence(), scalar("item"),
endSequence(), endDoc())
test "Parsing: Simple Map":
ensure("key: value", startDoc(), startMap(), scalar("key"),
scalar("value"), endMap(), endDoc())

View File

@ -1 +1 @@
import lexing
import lexing, parsing