Own token kinds for every special char, more tests

This commit is contained in:
Felix Krause 2015-11-29 22:27:05 +01:00
parent 71456c6bed
commit f876c845b7
2 changed files with 125 additions and 61 deletions

View File

@ -19,7 +19,11 @@ type
# tokens in directives and content
yamlTagHandle, yamlComment,
# from here on tokens only in content
yamlLineStart, yamlControlChar,
yamlLineStart,
# control characters
yamlColon, yamlDash, yamlQuestionmark, yamlComma, yamlOpeningBrace,
yamlOpeningBracket, yamlClosingBrace, yamlClosingBracket, yamlPipe,
yamlGreater,
# block scalar header
yamlLiteralScalar, yamlFoldedScalar,
yamlBlockIndentationIndicator, yamlBlockChompingIndicator,
@ -40,7 +44,7 @@ type
YamlLexerState = enum
# initial states (not started reading any token)
ylInitial, ylInitialSpaces, ylInitialUnknown, ylInitialContent,
ylDefineTagHandleInitial, ylDefineTagURIInitial, ylBlock, ylFlow,
ylDefineTagHandleInitial, ylDefineTagURIInitial, ylInitialInLine,
ylLineEnd, ylDirectiveLineEnd,
# directive reading states
ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion,
@ -145,10 +149,6 @@ template yieldError(message: string) {.dirty.} =
yield (kind: yamlError, position: position)
my.content = ""
template yieldChar(c: char) {.dirty.} =
my.content = "" & c
yield (kind: yamlControlChar, position: position)
template handleCR() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen -
my.charoffset - 1
@ -208,8 +208,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
of ' ', '\t':
my.content.add(c)
of '#':
state = ylDirectiveComment
my.content = ""
state = ylDirectiveComment
of EndOfFile, '\r', '\x0A':
state = ylDirectiveLineEnd
continue
@ -222,6 +222,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
state = ylDashes
continue
of '.':
yieldToken(yamlLineStart)
state = ylDots
continue
else:
@ -235,11 +236,17 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
case my.content.len
of 3:
yieldToken(yamlDirectivesEnd)
state = ylBlock
state = ylInitialInLine
of 1:
my.content = ""
yieldToken(yamlLineStart)
lastSpecialChar = '-'
state = ylBlock
state = ylInitialInLine
else:
let tmp = my.content
my.content = ""
yieldToken(yamlLineStart)
my.content = tmp
state = ylPlainScalar
continue
else:
@ -407,7 +414,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
trailingSpace = ""
else:
trailingSpace = ""
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
of ylPlainScalar:
@ -427,7 +434,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
else: my.content.add(c)
of '[', ']', '{', '}':
yieldToken(yamlScalar)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
else:
my.content.add(c)
@ -437,7 +444,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
case c
of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
else:
my.content.add(trailingSpace)
my.content.add(lastSpecialChar)
@ -467,7 +474,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
of '[', ']', '{', '}':
yieldToken(yamlScalar)
trailingSpace = ""
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
else:
my.content.add(trailingSpace)
@ -475,7 +482,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
trailingSpace = ""
state = ylPlainScalar
of ylFlow, ylBlock:
of ylInitialInLine:
if lastSpecialChar != '\0':
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
@ -483,9 +490,15 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
of '#':
my.content = "#"
state = ylComment
lastSpecialChar = '\0'
of ':':
yieldToken(yamlColon)
of '?':
yieldToken(yamlQuestionmark)
of '-':
yieldToken(yamlDash)
else:
yieldChar(lastSpecialChar)
yieldError("Unexpected special char: \"" &
lastSpecialChar & "\"")
lastSpecialChar = '\0'
elif lastSpecialChar == '!':
case c
@ -507,32 +520,34 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
state = ylLineEnd
continue
of ',':
if state == ylFlow:
yieldChar(c)
if flowDepth > 0:
yieldToken(yamlComma)
else:
my.content = "" & c
state = ylPlainScalar
of '[', '{':
of '[':
inc(flowDepth)
yieldChar(c)
of ']', '}':
if state == ylBlock:
yieldError(c & " encountered while in block mode")
else:
yieldToken(yamlOpeningBracket)
of '{':
inc(flowDepth)
yieldToken(yamlOpeningBrace)
of ']':
yieldToken(yamlClosingBracket)
if flowDepth > 0:
inc(flowDepth, -1)
of '}':
yieldToken(yamlClosingBrace)
if flowDepth > 0:
inc(flowDepth, -1)
yieldChar(c)
if flowDepth == 0:
state = ylBlock
of '#':
lastSpecialChar = '#'
of '"':
my.content = ""
state = ylDoublyQuotedScalar
of '\'':
my.content = ""
state = ylSingleQuotedScalar
of '!':
lastSpecialChar = '!'
my.content.add(c)
state = ylTagHandle
of '&':
yieldError("TODO: anchors")
of '*':
@ -540,15 +555,18 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
of ' ':
discard
of '-':
if state == ylBlock:
if flowDepth == 0:
lastSpecialChar = '-'
else:
my.content = "" & c
state = ylPlainScalar
of '?', ':':
lastSpecialChar = c
of '|', '>':
yieldChar(c)
of '|':
yieldToken(yamlPipe)
state = ylBlockScalarHeader
of '>':
yieldToken(yamlGreater)
state = ylBlockScalarHeader
of '\t':
discard
@ -587,7 +605,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
else:
state = ylBlockScalar
continue
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
of ylTagHandle:
case c
@ -603,12 +621,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
yieldToken(yamlTagHandle)
my.content = suffix
yieldToken(yamlTagSuffix)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
else:
yieldError("Invalid character in tag handle: " & c)
my.content = ""
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of ylTagSuffix:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
@ -616,11 +634,11 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
my.content.add(c)
of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlTagSuffix)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
else:
yieldError("Invalid character in tag suffix: " & c)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of ylVerbatimTag:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
@ -628,7 +646,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
my.content.add(c)
of '>':
yieldToken(yamlVerbatimTag)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of EndOfFile, '\r', '\x0A':
yieldError("Unfinished verbatim tag")
state = ylLineEnd
@ -636,7 +654,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
else:
yieldError("Invalid character in tag URI: " & c)
my.content = ""
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of ylDirective:
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
@ -712,7 +730,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
state = ylDefineTagHandle
else:
yieldError("Unexpected character in %TAG directive: " & c)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of ylDefineTagHandle:
case c
of '!':
@ -727,7 +745,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
continue
else:
yieldError("Unexpected char in %TAG directive: " & c)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
of ylDefineTagURIInitial:
case c
of '\t', ' ':
@ -753,7 +771,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent =
continue
else:
yieldError("Invalid URI character: " & c)
state = if flowDepth > 0: ylFlow else: ylBlock
state = ylInitialInLine
continue
of ylBlockScalarHeader:
case c

View File

@ -11,6 +11,11 @@ template ensure(input: string, expected: openarray[BasicLexerEvent]) =
lex: YamlLexer
lex.open(newStringStream(input))
for token in lex.tokens:
if i >= expected.len:
echo "received more tokens than expected (next token = ",
token.kind, ")"
fail()
break
if token.kind != expected[i].kind:
if token.kind == yamlError:
echo "got lexer error: " & lex.content
@ -26,31 +31,34 @@ template ensure(input: string, expected: openarray[BasicLexerEvent]) =
fail()
break
inc(i)
if i < expected.len:
echo "received less tokens than expected (first missing = ",
expected[i].kind, ")"
proc t(kind: YamlLexerEventKind, content: string): BasicLexerEvent =
(kind: kind, content: content)
suite "Lexing":
test "YAML directive":
test "YAML Directive":
ensure("%YAML 1.2", [t(yamlYamlDirective, nil),
t(yamlMajorVersion, "1"),
t(yamlMinorVersion, "2"),
t(yamlStreamEnd, nil)])
test "TAG directive":
test "TAG Directive":
ensure("%TAG !t! tag:http://example.com/",
[t(yamlTagDirective, nil),
t(yamlTagHandle, "!t!"),
t(yamlTagURI, "tag:http://example.com/"),
t(yamlStreamEnd, nil)])
test "Unknown directive":
test "Unknown Directive":
ensure("%FOO bar baz", [t(yamlUnknownDirective, "%FOO"),
t(yamlUnknownDirectiveParam, "bar"),
t(yamlUnknownDirectiveParam, "baz"),
t(yamlStreamEnd, nil)])
test "Comments after directives":
test "Comments after Directives":
ensure("%YAML 1.2 # version\n# at line start\n # indented\n%FOO",
[t(yamlYamlDirective, nil),
t(yamlMajorVersion, "1"),
@ -61,18 +69,20 @@ suite "Lexing":
t(yamlUnknownDirective, "%FOO"),
t(yamlStreamEnd, nil)])
test "Directives end":
test "Directives End":
ensure("---", [t(yamlDirectivesEnd, nil),
t(yamlStreamEnd, nil)])
test "Document end":
ensure("...", [t(yamlDocumentEnd, nil),
test "Document End":
ensure("...", [t(yamlLineStart, nil),
t(yamlDocumentEnd, nil),
t(yamlStreamEnd, nil)])
test "Directive after document end":
test "Directive after Document End":
ensure("content\n...\n%YAML 1.2",
[t(yamlLineStart, nil),
[t(yamlLineStart, ""),
t(yamlScalar, "content"),
t(yamlLineStart, ""),
t(yamlDocumentEnd, nil),
t(yamlYamlDirective, nil),
t(yamlMajorVersion, "1"),
@ -80,12 +90,12 @@ suite "Lexing":
t(yamlStreamEnd, nil)])
test "Plain Scalar (alphanumeric)":
ensure("abA03rel4", [t(yamlLineStart, nil),
ensure("abA03rel4", [t(yamlLineStart, ""),
t(yamlScalar, "abA03rel4"),
t(yamlStreamEnd, nil)])
test "Plain Scalar (with spaces)":
ensure("test content", [t(yamlLineStart, nil),
ensure("test content", [t(yamlLineStart, ""),
t(yamlScalar, "test content"),
t(yamlStreamEnd, nil)])
@ -102,28 +112,64 @@ suite "Lexing":
t(yamlStreamEnd, nil)])
test "Single Quoted Scalar":
ensure("'? test - content! '", [t(yamlLineStart, nil),
ensure("'? test - content! '", [t(yamlLineStart, ""),
t(yamlScalar, "? test - content! "),
t(yamlStreamEnd, nil)])
test "Single Quoted Scalar (escaped single quote inside)":
ensure("'test '' content'", [t(yamlLineStart, nil),
ensure("'test '' content'", [t(yamlLineStart, ""),
t(yamlScalar, "test ' content"),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar":
ensure("\"test content\"", [t(yamlLineStart, nil),
ensure("\"test content\"", [t(yamlLineStart, ""),
t(yamlScalar, "test content"),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar (escaping)":
ensure(""""\t\\\0\""""", [t(yamlLineStart, nil),
ensure(""""\t\\\0\""""", [t(yamlLineStart, ""),
t(yamlScalar, "\t\\\0\""),
t(yamlStreamEnd, nil)])
test "Doubly Quoted Scalar (unicode escaping)":
ensure(""""\x42\u4243\U00424344"""",
[t(yamlLineStart, nil),
[t(yamlLineStart, ""),
t(yamlScalar, "\x42" & toUTF8(cast[Rune](0x4243)) &
toUTF8(cast[Rune](0x424344))),
t(yamlStreamEnd, nil)])
test "Block Array":
ensure("""
- a
- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "a"),
t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "b"),
t(yamlStreamEnd, nil)])
test "Block Map with Implicit Keys":
ensure("""
foo: bar
herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil),
t(yamlScalar, "bar"), t(yamlLineStart, ""),
t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlScalar, "derp"),
t(yamlStreamEnd, nil)])
test "Block Map with Explicit Keys":
ensure("""
? foo
: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), t(yamlScalar, "foo"),
t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalar, "bar"),
t(yamlStreamEnd, nil)])
test "Indentation":
ensure("""
foo:
bar:
- baz
- biz
herp: derp""",
[t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlScalar, "bar"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "baz"),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "biz"),
t(yamlLineStart, " "), t(yamlScalar, "herp"), t(yamlColon, nil),
t(yamlScalar, "derp"), t(yamlStreamEnd, nil)])