From f876c845b7b9f21bc7359a8e835790a701a78d07 Mon Sep 17 00:00:00 2001 From: Felix Krause Date: Sun, 29 Nov 2015 22:27:05 +0100 Subject: [PATCH] Own token kinds for every special char, more tests --- src/yaml/private/lexer.nim | 106 ++++++++++++++++++++++--------------- test/lexing.nim | 80 ++++++++++++++++++++++------ 2 files changed, 125 insertions(+), 61 deletions(-) diff --git a/src/yaml/private/lexer.nim b/src/yaml/private/lexer.nim index 057025d..cc1a945 100644 --- a/src/yaml/private/lexer.nim +++ b/src/yaml/private/lexer.nim @@ -19,7 +19,11 @@ type # tokens in directives and content yamlTagHandle, yamlComment, # from here on tokens only in content - yamlLineStart, yamlControlChar, + yamlLineStart, + # control characters + yamlColon, yamlDash, yamlQuestionmark, yamlComma, yamlOpeningBrace, + yamlOpeningBracket, yamlClosingBrace, yamlClosingBracket, yamlPipe, + yamlGreater, # block scalar header yamlLiteralScalar, yamlFoldedScalar, yamlBlockIndentationIndicator, yamlBlockChompingIndicator, @@ -40,7 +44,7 @@ type YamlLexerState = enum # initial states (not started reading any token) ylInitial, ylInitialSpaces, ylInitialUnknown, ylInitialContent, - ylDefineTagHandleInitial, ylDefineTagURIInitial, ylBlock, ylFlow, + ylDefineTagHandleInitial, ylDefineTagURIInitial, ylInitialInLine, ylLineEnd, ylDirectiveLineEnd, # directive reading states ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion, @@ -145,10 +149,6 @@ template yieldError(message: string) {.dirty.} = yield (kind: yamlError, position: position) my.content = "" -template yieldChar(c: char) {.dirty.} = - my.content = "" & c - yield (kind: yamlControlChar, position: position) - template handleCR() {.dirty.} = my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen - my.charoffset - 1 @@ -208,8 +208,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = of ' ', '\t': my.content.add(c) of '#': - state = ylDirectiveComment my.content = "" + state = ylDirectiveComment of EndOfFile, '\r', '\x0A': state = ylDirectiveLineEnd continue @@ -222,6 +222,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = state = ylDashes continue of '.': + yieldToken(yamlLineStart) state = ylDots continue else: @@ -235,11 +236,17 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = case my.content.len of 3: yieldToken(yamlDirectivesEnd) - state = ylBlock + state = ylInitialInLine of 1: + my.content = "" + yieldToken(yamlLineStart) lastSpecialChar = '-' - state = ylBlock + state = ylInitialInLine else: + let tmp = my.content + my.content = "" + yieldToken(yamlLineStart) + my.content = tmp state = ylPlainScalar continue else: @@ -407,7 +414,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = trailingSpace = "" else: trailingSpace = "" - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue of ylPlainScalar: @@ -427,7 +434,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = else: my.content.add(c) of '[', ']', '{', '}': yieldToken(yamlScalar) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue else: my.content.add(c) @@ -437,7 +444,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = case c of ' ', '\t', EndOfFile, '\r', '\x0A': yieldToken(yamlScalar) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine else: my.content.add(trailingSpace) my.content.add(lastSpecialChar) @@ -467,7 +474,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = of '[', ']', '{', '}': yieldToken(yamlScalar) trailingSpace = "" - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue else: my.content.add(trailingSpace) @@ -475,7 +482,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = trailingSpace = "" state = ylPlainScalar - of ylFlow, ylBlock: + of ylInitialInLine: if lastSpecialChar != '\0': case c of ' ', '\t', '\r', '\x0A', EndOfFile: @@ -483,10 +490,16 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = of '#': my.content = "#" state = ylComment - lastSpecialChar = '\0' + of ':': + yieldToken(yamlColon) + of '?': + yieldToken(yamlQuestionmark) + of '-': + yieldToken(yamlDash) else: - yieldChar(lastSpecialChar) - lastSpecialChar = '\0' + yieldError("Unexpected special char: \"" & + lastSpecialChar & "\"") + lastSpecialChar = '\0' elif lastSpecialChar == '!': case c of '<': @@ -507,32 +520,34 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = state = ylLineEnd continue of ',': - if state == ylFlow: - yieldChar(c) + if flowDepth > 0: + yieldToken(yamlComma) else: my.content = "" & c state = ylPlainScalar - of '[', '{': + of '[': inc(flowDepth) - yieldChar(c) - of ']', '}': - if state == ylBlock: - yieldError(c & " encountered while in block mode") - else: + yieldToken(yamlOpeningBracket) + of '{': + inc(flowDepth) + yieldToken(yamlOpeningBrace) + of ']': + yieldToken(yamlClosingBracket) + if flowDepth > 0: + inc(flowDepth, -1) + of '}': + yieldToken(yamlClosingBrace) + if flowDepth > 0: inc(flowDepth, -1) - yieldChar(c) - if flowDepth == 0: - state = ylBlock of '#': lastSpecialChar = '#' of '"': - my.content = "" state = ylDoublyQuotedScalar of '\'': - my.content = "" state = ylSingleQuotedScalar of '!': - lastSpecialChar = '!' + my.content.add(c) + state = ylTagHandle of '&': yieldError("TODO: anchors") of '*': @@ -540,15 +555,18 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = of ' ': discard of '-': - if state == ylBlock: + if flowDepth == 0: lastSpecialChar = '-' else: my.content = "" & c state = ylPlainScalar of '?', ':': lastSpecialChar = c - of '|', '>': - yieldChar(c) + of '|': + yieldToken(yamlPipe) + state = ylBlockScalarHeader + of '>': + yieldToken(yamlGreater) state = ylBlockScalarHeader of '\t': discard @@ -587,7 +605,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = else: state = ylBlockScalar continue - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue of ylTagHandle: case c @@ -603,12 +621,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = yieldToken(yamlTagHandle) my.content = suffix yieldToken(yamlTagSuffix) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue else: yieldError("Invalid character in tag handle: " & c) my.content = "" - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of ylTagSuffix: case c of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', @@ -616,11 +634,11 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = my.content.add(c) of ' ', '\t', EndOfFile, '\r', '\x0A': yieldToken(yamlTagSuffix) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue else: yieldError("Invalid character in tag suffix: " & c) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of ylVerbatimTag: case c of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', @@ -628,7 +646,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = my.content.add(c) of '>': yieldToken(yamlVerbatimTag) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of EndOfFile, '\r', '\x0A': yieldError("Unfinished verbatim tag") state = ylLineEnd @@ -636,7 +654,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = else: yieldError("Invalid character in tag URI: " & c) my.content = "" - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of ylDirective: case c of ' ', '\t', '\r', '\x0A', EndOfFile: @@ -712,7 +730,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = state = ylDefineTagHandle else: yieldError("Unexpected character in %TAG directive: " & c) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of ylDefineTagHandle: case c of '!': @@ -727,7 +745,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = continue else: yieldError("Unexpected char in %TAG directive: " & c) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine of ylDefineTagURIInitial: case c of '\t', ' ': @@ -753,7 +771,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerEvent = continue else: yieldError("Invalid URI character: " & c) - state = if flowDepth > 0: ylFlow else: ylBlock + state = ylInitialInLine continue of ylBlockScalarHeader: case c diff --git a/test/lexing.nim b/test/lexing.nim index e23c83e..f23ae87 100644 --- a/test/lexing.nim +++ b/test/lexing.nim @@ -11,6 +11,11 @@ template ensure(input: string, expected: openarray[BasicLexerEvent]) = lex: YamlLexer lex.open(newStringStream(input)) for token in lex.tokens: + if i >= expected.len: + echo "received more tokens than expected (next token = ", + token.kind, ")" + fail() + break if token.kind != expected[i].kind: if token.kind == yamlError: echo "got lexer error: " & lex.content @@ -26,31 +31,34 @@ template ensure(input: string, expected: openarray[BasicLexerEvent]) = fail() break inc(i) + if i < expected.len: + echo "received less tokens than expected (first missing = ", + expected[i].kind, ")" proc t(kind: YamlLexerEventKind, content: string): BasicLexerEvent = (kind: kind, content: content) suite "Lexing": - test "YAML directive": + test "YAML Directive": ensure("%YAML 1.2", [t(yamlYamlDirective, nil), t(yamlMajorVersion, "1"), t(yamlMinorVersion, "2"), t(yamlStreamEnd, nil)]) - test "TAG directive": + test "TAG Directive": ensure("%TAG !t! tag:http://example.com/", [t(yamlTagDirective, nil), t(yamlTagHandle, "!t!"), t(yamlTagURI, "tag:http://example.com/"), t(yamlStreamEnd, nil)]) - test "Unknown directive": + test "Unknown Directive": ensure("%FOO bar baz", [t(yamlUnknownDirective, "%FOO"), t(yamlUnknownDirectiveParam, "bar"), t(yamlUnknownDirectiveParam, "baz"), t(yamlStreamEnd, nil)]) - test "Comments after directives": + test "Comments after Directives": ensure("%YAML 1.2 # version\n# at line start\n # indented\n%FOO", [t(yamlYamlDirective, nil), t(yamlMajorVersion, "1"), @@ -61,18 +69,20 @@ suite "Lexing": t(yamlUnknownDirective, "%FOO"), t(yamlStreamEnd, nil)]) - test "Directives end": + test "Directives End": ensure("---", [t(yamlDirectivesEnd, nil), t(yamlStreamEnd, nil)]) - test "Document end": - ensure("...", [t(yamlDocumentEnd, nil), + test "Document End": + ensure("...", [t(yamlLineStart, nil), + t(yamlDocumentEnd, nil), t(yamlStreamEnd, nil)]) - test "Directive after document end": + test "Directive after Document End": ensure("content\n...\n%YAML 1.2", - [t(yamlLineStart, nil), + [t(yamlLineStart, ""), t(yamlScalar, "content"), + t(yamlLineStart, ""), t(yamlDocumentEnd, nil), t(yamlYamlDirective, nil), t(yamlMajorVersion, "1"), @@ -80,12 +90,12 @@ suite "Lexing": t(yamlStreamEnd, nil)]) test "Plain Scalar (alphanumeric)": - ensure("abA03rel4", [t(yamlLineStart, nil), + ensure("abA03rel4", [t(yamlLineStart, ""), t(yamlScalar, "abA03rel4"), t(yamlStreamEnd, nil)]) test "Plain Scalar (with spaces)": - ensure("test content", [t(yamlLineStart, nil), + ensure("test content", [t(yamlLineStart, ""), t(yamlScalar, "test content"), t(yamlStreamEnd, nil)]) @@ -102,28 +112,64 @@ suite "Lexing": t(yamlStreamEnd, nil)]) test "Single Quoted Scalar": - ensure("'? test - content! '", [t(yamlLineStart, nil), + ensure("'? test - content! '", [t(yamlLineStart, ""), t(yamlScalar, "? test - content! "), t(yamlStreamEnd, nil)]) test "Single Quoted Scalar (escaped single quote inside)": - ensure("'test '' content'", [t(yamlLineStart, nil), + ensure("'test '' content'", [t(yamlLineStart, ""), t(yamlScalar, "test ' content"), t(yamlStreamEnd, nil)]) test "Doubly Quoted Scalar": - ensure("\"test content\"", [t(yamlLineStart, nil), + ensure("\"test content\"", [t(yamlLineStart, ""), t(yamlScalar, "test content"), t(yamlStreamEnd, nil)]) test "Doubly Quoted Scalar (escaping)": - ensure(""""\t\\\0\""""", [t(yamlLineStart, nil), + ensure(""""\t\\\0\""""", [t(yamlLineStart, ""), t(yamlScalar, "\t\\\0\""), t(yamlStreamEnd, nil)]) test "Doubly Quoted Scalar (unicode escaping)": ensure(""""\x42\u4243\U00424344"""", - [t(yamlLineStart, nil), + [t(yamlLineStart, ""), t(yamlScalar, "\x42" & toUTF8(cast[Rune](0x4243)) & toUTF8(cast[Rune](0x424344))), - t(yamlStreamEnd, nil)]) \ No newline at end of file + t(yamlStreamEnd, nil)]) + + test "Block Array": + ensure(""" +- a +- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "a"), + t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "b"), + t(yamlStreamEnd, nil)]) + + test "Block Map with Implicit Keys": + ensure(""" +foo: bar +herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), + t(yamlScalar, "bar"), t(yamlLineStart, ""), + t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlScalar, "derp"), + t(yamlStreamEnd, nil)]) + + test "Block Map with Explicit Keys": + ensure(""" +? foo +: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), t(yamlScalar, "foo"), + t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalar, "bar"), + t(yamlStreamEnd, nil)]) + + test "Indentation": + ensure(""" +foo: + bar: + - baz + - biz + herp: derp""", + [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), + t(yamlLineStart, " "), t(yamlScalar, "bar"), t(yamlColon, nil), + t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "baz"), + t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "biz"), + t(yamlLineStart, " "), t(yamlScalar, "herp"), t(yamlColon, nil), + t(yamlScalar, "derp"), t(yamlStreamEnd, nil)]) \ No newline at end of file