From e307cbbad568c64c7580d2890a1dc86b81279342 Mon Sep 17 00:00:00 2001 From: Felix Krause Date: Wed, 23 Dec 2015 18:12:51 +0100 Subject: [PATCH] Pipe type hints through parser. Fixed bugs. --- src/private/lexer.nim | 7 ++--- src/private/sequential.nim | 63 ++++++++++++++++++++++++++------------ src/yaml.nim | 4 ++- test/lexing.nim | 7 +++-- test/parsing.nim | 59 ++++++++++++++++++++++------------- 5 files changed, 90 insertions(+), 50 deletions(-) diff --git a/src/private/lexer.nim b/src/private/lexer.nim index 1d1809a..19854f7 100644 --- a/src/private/lexer.nim +++ b/src/private/lexer.nim @@ -164,10 +164,10 @@ template yieldScalarPart() {.dirty.} = of ythDecimal, ythExponent: my.typeHint = yTypeFloat else: - my.typeHint = yTypeString + my.typeHint = yTypeUnknown when defined(yamlDebug): - echo "Lexer token: tScalarPart(\"", my.content, "\".", my.typeHint, + echo "Lexer token: tScalarPart(\"", my.content, "\".", typeHintState, ")" yield tScalarPart my.content = "" @@ -324,7 +324,6 @@ template advanceTypeHint(ch: char) {.dirty.} = else: typeHintState = ythNone state = ylPlainScalarNone - iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} = var @@ -734,11 +733,11 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} = lastSpecialChar = '\0' else: my.content.add(lastSpecialChar) + advanceTypeHint(lastSpecialChar) lastSpecialChar = '\0' my.column = curPos - 1 state = ylPlainScalar typeHintState = ythInitial - advanceTypeHint(lastSpecialChar) continue case c of '\r', '\x0A', EndOfFile: diff --git a/src/private/sequential.nim b/src/private/sequential.nim index 40905c5..e125b5f 100644 --- a/src/private/sequential.nim +++ b/src/private/sequential.nim @@ -91,7 +91,8 @@ proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool = of yamlScalar: result = left.scalarAnchor == right.scalarAnchor and left.scalarTag == right.scalarTag and - left.scalarContent == right.scalarContent + left.scalarContent == right.scalarContent and + left.scalarType == right.scalarType of yamlAlias: result = left.aliasTarget == right.aliasTarget of yamlError, yamlWarning: @@ -141,13 +142,20 @@ proc resolveTag(parser: YamlSequentialParser, tag: var string, parser.tags[tag] = result tag = "" -template yieldScalar(content: string = "", quoted: bool = false) {.dirty.} = +template yieldScalar(content: string, typeHint: YamlTypeHint, + quoted: bool = false) {.dirty.} = + when defined(yamlDebug): + echo "Parser token [mode=", level.mode, ", state=", state, "]: ", + "scalar[\"", content, "\", type=", typeHint, "]" yield YamlParserEvent(kind: yamlScalar, scalarAnchor: resolveAnchor(parser, anchor), scalarTag: resolveTag(parser, tag, quoted), - scalarContent: content) + scalarContent: content, + scalarType: typeHint) template yieldStart(k: YamlParserEventKind) {.dirty.} = + when defined(yamlDebug): + echo "Parser token [mode=", level.mode, ", state=", state, "]: ", k yield YamlParserEvent(kind: k, objAnchor: resolveAnchor(parser, anchor), objTag: resolveTag(parser, tag)) @@ -161,20 +169,24 @@ template yieldDocumentEnd() {.dirty.} = template closeLevel(lvl: DocumentLevel) {.dirty.} = case lvl.mode of mExplicitBlockMapKey, mFlowMapKey: - yieldScalar("") + yieldScalar("", yTypeUnknown) yield YamlParserEvent(kind: yamlEndMap) of mImplicitBlockMapKey, mBlockMapValue, mFlowMapValue: yield YamlParserEvent(kind: yamlEndMap) of mBlockSequenceItem, mFlowSequenceItem: yield YamlParserEvent(kind: yamlEndSequence) of mScalar: + when defined(yamlDebug): + echo "Parser token [mode=", level.mode, ", state=", state, "]: ", + "scalar[\"", scalarCache, "\", type=", scalarCacheType, "]" yield YamlParserEvent(kind: yamlScalar, scalarAnchor: resolveAnchor(parser, anchor), scalarTag: resolveTag(parser, tag), - scalarContent: scalarCache) + scalarContent: scalarCache, + scalarType: scalarCacheType) else: - yieldScalar() + yieldScalar("", yTypeUnknown) proc mustLeaveLevel(curCol: int, ancestry: seq[DocumentLevel]): bool = if ancestry.len == 0: @@ -220,7 +232,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode], # `in` does not work if possible is [], so we have to check for that when possible.len > 0: if level.mode in possible: - yieldScalar("") + yieldScalar("", yTypeUnknown) level.mode = next ancestry.add(level) level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, @@ -246,7 +258,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode], yieldStart(entering) anchor = cachedAnchor tag = cachedTag - yieldScalar("") + yieldScalar("", yTypeUnknown) else: yieldStart(entering) ancestry.add(level) @@ -256,6 +268,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode], template startPlainScalar() {.dirty.} = level.mode = mScalar scalarCache = lex.content + scalarCacheType = lex.typeHint state = ypBlockAfterScalar template handleTagHandle() {.dirty.} = @@ -301,6 +314,7 @@ proc parse*(parser: YamlSequentialParser, tag: string = "" anchor: string = "" scalarCache: string = nil + scalarCacheType: YamlTypeHint scalarIndentation: int scalarCacheIsQuoted: bool = false aliasCache = anchorNone @@ -426,15 +440,17 @@ proc parse*(parser: YamlSequentialParser, level.indentationColumn = lex.column of mImplicitBlockMapKey: scalarCache = lex.content + scalarCacheType = lex.typeHint scalarCacheIsQuoted = false scalarIndentation = lex.column of mBlockMapValue: scalarCache = lex.content + scalarCacheType = lex.typeHint scalarCacheIsQuoted = false scalarIndentation = lex.column level.mode = mImplicitBlockMapKey of mExplicitBlockMapKey: - yieldScalar() + yieldScalar("", yTypeUnknown) level.mode = mBlockMapValue continue else: @@ -445,6 +461,7 @@ proc parse*(parser: YamlSequentialParser, case level.mode of mUnknown, mImplicitBlockMapKey: scalarCache = lex.content + scalarCacheType = yTypeString scalarCacheIsQuoted = true scalarIndentation = lex.column state = ypBlockAfterScalar @@ -489,6 +506,7 @@ proc parse*(parser: YamlSequentialParser, state = ypBlockLineStart continue scalarCache &= " " & lex.content + scalarCacheType = yTypeUnknown state = ypBlockLineEnd of tLineStart: discard @@ -527,14 +545,15 @@ proc parse*(parser: YamlSequentialParser, ancestry.add(level) level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, indentationColumn: -1) - yieldScalar(scalarCache, scalarCacheIsQuoted) + yieldScalar(scalarCache, scalarCacheType, scalarCacheIsQuoted) scalarCache = nil state = ypBlockAfterColon of tLineStart: if level.mode == mImplicitBlockMapKey: yieldError("Missing colon after implicit map key") if level.mode != mScalar: - yieldScalar(scalarCache, scalarCacheIsQuoted) + yieldScalar(scalarCache, scalarCacheType, + scalarCacheIsQuoted) scalarCache = nil if ancestry.len > 0: level = ancestry.pop() @@ -543,7 +562,7 @@ proc parse*(parser: YamlSequentialParser, else: state = ypBlockMultilineScalar of tStreamEnd: - yieldScalar(scalarCache, scalarCacheIsQuoted) + yieldScalar(scalarCache, scalarCacheType, scalarCacheIsQuoted) scalarCache = nil if ancestry.len > 0: level = ancestry.pop() @@ -650,7 +669,7 @@ proc parse*(parser: YamlSequentialParser, of ypBlockAfterColon: case token of tScalar: - yieldScalar(lex.content, true) + yieldScalar(lex.content, yTypeUnknown, true) level = ancestry.pop() assert level.mode == mBlockMapValue level.mode = mImplicitBlockMapKey @@ -787,12 +806,16 @@ proc parse*(parser: YamlSequentialParser, case token of tLineStart: discard - of tScalar, tScalarPart: - yieldScalar(lex.content, token == tScalar) + of tScalar: + yieldScalar(lex.content, yTypeUnknown, true) + level = ancestry.pop() + state = ypFlowAfterObject + of tScalarPart: + yieldScalar(lex.content, lex.typeHint) level = ancestry.pop() state = ypFlowAfterObject of tColon: - yieldScalar() + yieldScalar("", yTypeUnknown) level = ancestry.pop() if level.mode == mFlowMapKey: level.mode = mFlowMapValue @@ -802,7 +825,7 @@ proc parse*(parser: YamlSequentialParser, else: yieldUnexpectedToken("scalar, comma or map end") of tComma: - yieldScalar() + yieldScalar("", yTypeUnknown) level = ancestry.pop() case level.mode of mFlowMapValue: @@ -811,7 +834,7 @@ proc parse*(parser: YamlSequentialParser, level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, indentationColumn: -1) of mFlowSequenceItem: - yieldScalar() + yieldScalar("", yTypeUnknown) else: yieldError("Internal error! Please report this bug.") of tOpeningBrace: @@ -832,7 +855,7 @@ proc parse*(parser: YamlSequentialParser, indentationColumn: -1) of tClosingBrace: if level.mode == mUnknown: - yieldScalar() + yieldScalar("", yTypeUnknown) level = ancestry.pop() if level.mode != mFlowMapValue: yieldUnexpectedToken() @@ -848,7 +871,7 @@ proc parse*(parser: YamlSequentialParser, state = ypExpectingDocumentEnd of tClosingBracket: if level.mode == mUnknown: - yieldScalar() + yieldScalar("", yTypeUnknown) level = ancestry.pop() if level.mode != mFlowSequenceItem: yieldUnexpectedToken() diff --git a/src/yaml.nim b/src/yaml.nim index 91a789d..3bd1a32 100644 --- a/src/yaml.nim +++ b/src/yaml.nim @@ -22,6 +22,7 @@ type scalarAnchor* : AnchorId scalarTag* : TagId scalarContent*: string # may not be nil (but empty) + scalarType* : YamlTypeHint of yamlEndMap, yamlEndSequence, yamlStartDocument, yamlEndDocument: discard of yamlAlias: @@ -37,7 +38,8 @@ type # interface -proc parse*(parser: YamlSequentialParser, s: Stream): iterator(): YamlParserEvent +proc parse*(parser: YamlSequentialParser, s: Stream): + iterator(): YamlParserEvent # implementation diff --git a/test/lexing.nim b/test/lexing.nim index ab99e68..f3ed533 100644 --- a/test/lexing.nim +++ b/test/lexing.nim @@ -48,7 +48,7 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) = expected[i].kind, ")" proc t(kind: YamlLexerToken, content: string, - typeHint: YamlTypeHint = yTypeString): BasicLexerToken = + typeHint: YamlTypeHint = yTypeUnknown): BasicLexerToken = (kind: kind, content: content, typeHint: typeHint) suite "Lexing": @@ -210,10 +210,11 @@ foo: t(tVerbatimTag, "tag:http://example.com/str"), t(tScalarPart, "tagged"), t(tStreamEnd, nil)]) test "Lexing: Type hints": - ensure("false\nnull\nstring\n-13\n42.25\n-4e+3\n5.42e78", + ensure("false\nnull\nunknown\n\"string\"\n-13\n42.25\n-4e+3\n5.42e78", [t(tLineStart, ""), t(tScalarPart, "false", yTypeBoolean), t(tLineStart, ""), t(tScalarPart, "null", yTypeNull), - t(tLineStart, ""), t(tScalarPart, "string", yTypeString), + t(tLineStart, ""), t(tScalarPart, "unknown", yTypeUnknown), + t(tLineStart, ""), t(tScalar, "string", yTypeString), t(tLineStart, ""), t(tScalarPart, "-13", yTypeInteger), t(tLineStart, ""), t(tScalarPart, "42.25", yTypeFloat), t(tLineStart, ""), t(tScalarPart, "-4e+3", yTypeFloat), diff --git a/test/parsing.nim b/test/parsing.nim index 4757ce7..b318eaa 100644 --- a/test/parsing.nim +++ b/test/parsing.nim @@ -11,13 +11,20 @@ proc endDoc(): YamlParserEvent = new(result) result.kind = yamlEndDocument -proc scalar(content: string, tag: TagId = tagQuestionMark, - anchor: AnchorId = anchorNone): YamlParserEvent = +proc scalar(content: string, typeHint: YamlTypeHint, + tag: TagId = tagQuestionMark, anchor: AnchorId = anchorNone): + YamlParserEvent = new(result) result.kind = yamlScalar result.scalarAnchor = anchor result.scalarTag = tag result.scalarContent = content + result.scalarType = typeHint + +proc scalar(content: string, + tag: TagId = tagQuestionMark, anchor: AnchorId = anchorNone): + YamlParserEvent = + result = scalar(content, yTypeUnknown, tag, anchor) proc startSequence(tag: TagId = tagQuestionMark, anchor: AnchorId = anchorNone): @@ -79,6 +86,9 @@ proc printDifference(expected, actual: YamlParserEvent) = ", got ", cast[int](actual.scalarContent[i]), ")" break + elif expected.scalarType != actual.scalarType: + echo "[scalar] expected type hint ", expected.scalarType, + ", got ", actual.scalarType else: echo "[scalar] Unknown difference" of yamlStartMap, yamlStartSequence: @@ -121,22 +131,25 @@ suite "Parsing": test "Parsing: Simple Scalar": ensure("Scalar", startDoc(), scalar("Scalar"), endDoc()) test "Parsing: Simple Sequence": - ensure("- item", startDoc(), startSequence(), scalar("item"), - endSequence(), endDoc()) + ensure("- false", startDoc(), startSequence(), + scalar("false", yTypeBoolean), endSequence(), endDoc()) test "Parsing: Simple Map": - ensure("key: value\nkey2: value2", startDoc(), startMap(), - scalar("key"), scalar("value"), scalar("key2"), scalar("value2"), - endMap(), endDoc()) + ensure("42: value\nkey2: -7.5", startDoc(), startMap(), + scalar("42", yTypeInteger), scalar("value"), scalar("key2"), + scalar("-7.5", yTypeFloat), endMap(), endDoc()) test "Parsing: Explicit Map": - ensure("? key\n: value\n? key2\n: value2", startDoc(), startMap(), - scalar("key"), scalar("value"), scalar("key2"), scalar("value2"), + ensure("? null\n: value\n? true\n: value2", startDoc(), startMap(), + scalar("null", yTypeNull), scalar("value"), + scalar("true", yTypeBoolean), scalar("value2"), endMap(), endDoc()) test "Parsing: Mixed Map (explicit to implicit)": - ensure("? a\n: b\nc: d", startDoc(), startMap(), scalar("a"), - scalar("b"), scalar("c"), scalar("d"), endMap(), endDoc()) + ensure("? a\n: 13\n1.5: d", startDoc(), startMap(), scalar("a"), + scalar("13", yTypeInteger), scalar("1.5", yTypeFloat), + scalar("d"), endMap(), endDoc()) test "Parsing: Mixed Map (implicit to explicit)": - ensure("a: b\n? c\n: d", startDoc(), startMap(), scalar("a"), - scalar("b"), scalar("c"), scalar("d"), endMap(), endDoc()) + ensure("a: 4.2\n? 23\n: d", startDoc(), startMap(), scalar("a"), + scalar("4.2", yTypeFloat), scalar("23", yTypeInteger), + scalar("d"), endMap(), endDoc()) test "Parsing: Missing values in map": ensure("? a\n? b\nc:", startDoc(), startMap(), scalar("a"), scalar(""), scalar("b"), scalar(""), scalar("c"), scalar(""), endMap(), @@ -145,8 +158,8 @@ suite "Parsing": ensure(": a\n: b", startDoc(), startMap(), scalar(""), scalar("a"), scalar(""), scalar("b"), endMap(), endDoc()) test "Parsing: Multiline scalars in explicit map": - ensure("? a\n b\n: c\n d\n? e\n f", startDoc(), startMap(), - scalar("a b"), scalar("c d"), scalar("e f"), scalar(""), + ensure("? a\n true\n: null\n d\n? e\n 42", startDoc(), startMap(), + scalar("a true"), scalar("null d"), scalar("e 42"), scalar(""), endMap(), endDoc()) test "Parsing: Map in Sequence": ensure(" - key: value", startDoc(), startSequence(), startMap(), @@ -161,11 +174,12 @@ suite "Parsing": startSequence(), scalar("l1_i1"), scalar("l1_i2"), endSequence(), scalar("l2_i1"), endSequence(), endDoc()) test "Parsing: Flow Sequence": - ensure("[a, b]", startDoc(), startSequence(), scalar("a"), scalar("b"), - endSequence(), endDoc()) + ensure("[2, b]", startDoc(), startSequence(), scalar("2", yTypeInteger), + scalar("b"), endSequence(), endDoc()) test "Parsing: Flow Map": - ensure("{a: b, c: d}", startDoc(), startMap(), scalar("a"), scalar("b"), - scalar("c"), scalar("d"), endMap(), endDoc()) + ensure("{a: true, 1.337: d}", startDoc(), startMap(), scalar("a"), + scalar("true", yTypeBoolean), scalar("1.337", yTypeFloat), + scalar("d"), endMap(), endDoc()) test "Parsing: Flow Sequence in Flow Sequence": ensure("[a, [b, c]]", startDoc(), startSequence(), scalar("a"), startSequence(), scalar("b"), scalar("c"), endSequence(), @@ -202,7 +216,8 @@ suite "Parsing": ensure("a: |-\x0A ab\x0A \x0A \x0A", startDoc(), startMap(), scalar("a"), scalar("ab"), endMap(), endDoc()) test "Parsing: non-specific tags of quoted strings": - ensure("\"a\"", startDoc(), scalar("a", tagExclamationMark), endDoc()) + ensure("\"a\"", startDoc(), + scalar("a", yTypeString, tagExclamationMark), endDoc()) test "Parsing: explicit non-specific tag": ensure("! a", startDoc(), scalar("a", tagExclamationMark), endDoc()) test "Parsing: secondary tag handle resolution": @@ -287,14 +302,14 @@ suite "Parsing": alias(1.AnchorId), alias(0.AnchorId), startSequence(), scalar("c"), alias(1.AnchorId), scalar("d"), endSequence(), endMap(), endDoc()) - test "Parsing: tags on empty scalars": + test "Parsing: Tags on empty scalars": let idStr = parser.registerUri("tag:yaml.org,2002:str") idInt = parser.registerUri("tag:yaml.org,2002:int") ensure("!!str : a\nb: !!int\n!!str : !!str", startDoc(), startMap(), scalar("", idStr), scalar("a"), scalar("b"), scalar("", idInt), scalar("", idStr), scalar("", idStr), endMap(), endDoc()) - test "Parsing: anchors on empty scalars": + test "Parsing: Anchors on empty scalars": ensure("&a : a\nb: &b\n&c : &a", startDoc(), startMap(), scalar("", tagQuestionMark, 0.AnchorId), scalar("a"), scalar("b"), scalar("", tagQuestionMark, 1.AnchorId),