diff --git a/src/yaml/private/lexer.nim b/src/yaml/private/lexer.nim index e218077..37cb30a 100644 --- a/src/yaml/private/lexer.nim +++ b/src/yaml/private/lexer.nim @@ -28,7 +28,7 @@ type yamlLiteralScalar, yamlFoldedScalar, yamlBlockIndentationIndicator, yamlBlockChompingIndicator, # scalar content - yamlScalar, yamlBlockScalarLine, + yamlScalar, yamlScalarPart, # tags yamlVerbatimTag, yamlTagSuffix, # anchoring @@ -433,7 +433,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} = of ylPlainScalar: case c of EndOfFile, '\r', '\x0A': - yieldToken(yamlScalar) + yieldToken(yamlScalarPart) state = ylLineEnd continue of ':': @@ -459,7 +459,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} = if lastSpecialChar != '\0': case c of ' ', '\t', EndOfFile, '\r', '\x0A': - yieldToken(yamlScalar) + yieldToken(yamlScalarPart) state = ylInitialInLine else: my.content.add(trailingSpace) @@ -472,7 +472,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} = case c of EndOfFile, '\r', '\x0A': trailingSpace = "" - yieldToken(yamlScalar) + yieldToken(yamlScalarPart) state = ylLineEnd continue of ' ', '\t': @@ -814,7 +814,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} = of ylBlockScalar: case c of EndOfFile, '\r', '\x0A': - yieldToken(yamlBlockScalarLine) + yieldToken(yamlScalarPart) state = ylLineEnd continue else: diff --git a/src/yaml/sequential.nim b/src/yaml/sequential.nim index 2811c10..141380a 100644 --- a/src/yaml/sequential.nim +++ b/src/yaml/sequential.nim @@ -29,12 +29,13 @@ type YamlParserState = enum ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag, ylBlockAfterAnchor, ylBlockAfterScalar, ylBlockAfterColon, - ylBlockLineEnd, ylFlow, ylFlowAfterObject, ylExpectingDocumentEnd + ylBlockMultilineScalar, ylBlockLineEnd, ylFlow, ylFlowAfterObject, + ylExpectingDocumentEnd DocumentLevelMode = enum mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey, mExplicitBlockMapValue, mImplicitBlockMapKey, mImplicitBlockMapValue, - mFlowMapKey, mFlowMapValue, mUnknown + mFlowMapKey, mFlowMapValue, mPlainScalar, mScalar, mUnknown DocumentLevel = object mode: DocumentLevelMode @@ -90,15 +91,26 @@ template closeLevel(lvl: DocumentLevel) {.dirty.} = yield YamlParserEvent(kind: yamlEndMap) of mBlockSequenceItem, mFlowSequenceItem: yield YamlParserEvent(kind: yamlEndSequence) + of mScalar: + yield YamlParserEvent(kind: yamlScalar, scalarAnchor: anchor, + scalarTag: tag, scalarContent: scalarCache) + anchor = nil + tag = nil else: yieldScalar() template leaveMoreIndentedLevels() {.dirty.} = - while level.indicatorColumn > lex.column or - (level.indicatorColumn == -1 and - level.indentationColumn > lex.column): - closeLevel(level) - level = ancestry.pop() + while ancestry.len > 0: + let parent = ancestry[ancestry.high] + if parent.indicatorColumn >= lex.column or + (parent.indicatorColumn == -1 and + parent.indentationColumn >= lex.column): + closeLevel(level) + level = ancestry.pop() + if level.mode == mImplicitBlockMapValue: + level.mode = mImplicitBlockMapKey + else: + break template closeAllLevels() {.dirty.} = while true: @@ -131,17 +143,25 @@ template handleBlockIndicator(expected, next: DocumentLevelMode, iterator events*(input: Stream): YamlParserEvent {.closure.} = var + # parsing state lex: YamlLexer + state = ylInitial + + # document state foundYamlDirective = false tagShorthands = initTable[string, string]() + + # object tree state ancestry = newSeq[DocumentLevel]() level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, indentationColumn: -1) - cachedScalar: YamlParserEvent - cachedScalarIndentation: int + + # cached values tag: string = nil anchor: string = nil - state = ylInitial + scalarCache: string = nil + scalarIndentation: int + lex.open(input) var nextToken = tokens @@ -238,17 +258,32 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} = of yamlAnchor: anchor = lex.content state = ylBlockAfterAnchor + of yamlScalarPart: + leaveMoreIndentedLevels() + case level.mode + of mUnknown: + level.mode = mScalar + scalarCache = lex.content + scalarIndentation = lex.column + of mImplicitBlockMapKey: + scalarCache = lex.content + scalarIndentation = lex.column + of mImplicitBlockMapValue: + ancestry.add(level) + scalarCache = lex.content + scalarIndentation = lex.column + level = DocumentLevel(mode: mScalar, indicatorColumn: -1, + indentationColumn: + ancestry[ancestry.high].indentationColumn + 1) + else: + yieldError("Unexpected scalar") + state = ylBlockAfterScalar of lexer.yamlScalar: leaveMoreIndentedLevels() case level.mode of mUnknown, mImplicitBlockMapKey: - cachedScalar = YamlParserEvent(kind: yamlScalar, - scalarAnchor: anchor, - scalarTag: tag, - scalarContent: lex.content) - anchor = nil - tag = nil - cachedScalarIndentation = lex.column + scalarCache = lex.content + scalarIndentation = lex.column state = ylBlockAfterScalar else: yieldError("Unexpected scalar") @@ -267,35 +302,73 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} = state = ylFlow continue else: - yieldError("Unexpected token: " & $token) + yieldError("[block line start] Unexpected token: " & $token) + of ylBlockMultilineScalar: + case token + of yamlScalarPart: + leaveMoreIndentedLevels() + if level.mode != mScalar: + state = ylBlockLineStart + continue + scalarCache &= " " & lex.content + state = ylBlockLineEnd + of yamlLineStart: + discard + of yamlColon, yamlDash, yamlQuestionMark: + leaveMoreIndentedLevels() + if level.mode != mScalar: + state = ylBlockLineStart + continue + yieldError("[multiline scalar ?:-] Unexpected token: " & $token) + of yamlDocumentEnd, yamlStreamEnd: + closeAllLevels() + scalarCache = nil + state = ylExpectingDocumentEnd + continue + of yamlDirectivesEnd: + closeAllLevels() + state = ylInitial + continue + else: + yieldError("[multiline scalar] Unexpected token: " & $token) of ylBlockAfterScalar: case token of yamlColon: - assert level.mode == mUnknown or - level.mode == mImplicitBlockMapKey - if level.mode == mUnknown: - level.indentationColumn = cachedScalarIndentation + assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar] + if level.mode in [mUnknown, mScalar]: + level.indentationColumn = scalarIndentation yieldStart(yamlStartMap) level.mode = mImplicitBlockMapValue ancestry.add(level) level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, indentationColumn: -1) - yield cachedScalar - cachedScalar = nil + yield YamlParserEvent(kind: yamlScalar, + scalarAnchor: anchor, + scalarTag: tag, + scalarContent: scalarCache) + scalarCache = nil state = ylBlockAfterColon of yamlLineStart: if level.mode == mImplicitBlockMapKey: yieldError("Missing colon after implicit map key") - yield cachedScalar - cachedScalar = nil - if ancestry.len > 0: - level = ancestry.pop() - state = ylBlockLineStart + if level.mode != mScalar: + yield YamlParserEvent(kind: yamlScalar, + scalarAnchor: anchor, + scalarTag: tag, + scalarContent: scalarCache) + scalarCache = nil + if ancestry.len > 0: + level = ancestry.pop() + else: + state = ylExpectingDocumentEnd else: - state = ylExpectingDocumentEnd + state = ylBlockMultilineScalar of yamlStreamEnd: - yield cachedScalar - cachedScalar = nil + yield YamlParserEvent(kind: yamlScalar, + scalarAnchor: anchor, + scalarTag: tag, + scalarContent: scalarCache) + scalarCache = nil if ancestry.len > 0: level = ancestry.pop() closeAllLevels() @@ -339,6 +412,16 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} = assert level.mode == mImplicitBlockMapValue level.mode = mImplicitBlockMapKey state = ylBlockLineEnd + of yamlScalarPart: + level.mode = mScalar + scalarCache = lex.content + if ancestry[ancestry.high].indicatorColumn != -1: + level.indentationColumn = + ancestry[ancestry.high].indicatorColumn + 1 + else: + level.indentationColumn = + ancestry[ancestry.high].indentationColumn + 1 + state = ylBlockLineEnd of yamlLineStart: state = ylBlockLineStart of yamlStreamEnd: @@ -354,7 +437,8 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} = of ylBlockLineEnd: case token of yamlLineStart: - state = ylBlockLineStart + state = if level.mode == mScalar: ylBlockMultilineScalar else: + ylBlockLineStart of yamlStreamEnd: closeAllLevels() yield YamlParserEvent(kind: yamlEndDocument) @@ -365,7 +449,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} = case token of yamlLineStart: discard - of lexer.yamlScalar: + of lexer.yamlScalar, yamlScalarPart: yieldScalar(lex.content) level = ancestry.pop() state = ylFlowAfterObject diff --git a/test/lexing.nim b/test/lexing.nim index 55936db..1b723da 100644 --- a/test/lexing.nim +++ b/test/lexing.nim @@ -81,7 +81,7 @@ suite "Lexing": test "Lexing: Directive after Document End": ensure("content\n...\n%YAML 1.2", [t(yamlLineStart, ""), - t(yamlScalar, "content"), + t(yamlScalarPart, "content"), t(yamlLineStart, ""), t(yamlDocumentEnd, nil), t(yamlYamlDirective, nil), @@ -91,24 +91,24 @@ suite "Lexing": test "Lexing: Plain Scalar (alphanumeric)": ensure("abA03rel4", [t(yamlLineStart, ""), - t(yamlScalar, "abA03rel4"), + t(yamlScalarPart, "abA03rel4"), t(yamlStreamEnd, nil)]) test "Lexing: Plain Scalar (with spaces)": ensure("test content", [t(yamlLineStart, ""), - t(yamlScalar, "test content"), + t(yamlScalarPart, "test content"), t(yamlStreamEnd, nil)]) test "Lexing: Plain Scalar (with special chars)": ensure(":test ?content -with #special !chars", [t(yamlLineStart, nil), - t(yamlScalar, ":test ?content -with #special !chars"), + t(yamlScalarPart, ":test ?content -with #special !chars"), t(yamlStreamEnd, nil)]) test "Lexing: Plain Scalar (starting with %)": ensure("---\n%test", [t(yamlDirectivesEnd, nil), t(yamlLineStart, ""), - t(yamlScalar, "%test"), + t(yamlScalarPart, "%test"), t(yamlStreamEnd, nil)]) test "Lexing: Single Quoted Scalar": @@ -141,24 +141,25 @@ suite "Lexing": test "Lexing: Block Array": ensure(""" - a -- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "a"), - t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "b"), +- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "a"), + t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "b"), t(yamlStreamEnd, nil)]) test "Lexing: Block Map with Implicit Keys": ensure(""" foo: bar -herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), - t(yamlScalar, "bar"), t(yamlLineStart, ""), - t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlScalar, "derp"), +herp: derp""", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"), + t(yamlColon, nil), t(yamlScalarPart, "bar"), + t(yamlLineStart, ""), t(yamlScalarPart, "herp"), + t(yamlColon, nil), t(yamlScalarPart, "derp"), t(yamlStreamEnd, nil)]) test "Lexing: Block Map with Explicit Keys": ensure(""" ? foo -: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), t(yamlScalar, "foo"), - t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalar, "bar"), - t(yamlStreamEnd, nil)]) +: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), + t(yamlScalarPart, "foo"), t(yamlLineStart, ""), t(yamlColon, nil), + t(yamlScalarPart, "bar"), t(yamlStreamEnd, nil)]) test "Lexing: Indentation": ensure(""" @@ -167,30 +168,31 @@ foo: - baz - biz herp: derp""", - [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), - t(yamlLineStart, " "), t(yamlScalar, "bar"), t(yamlColon, nil), - t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "baz"), - t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "biz"), - t(yamlLineStart, " "), t(yamlScalar, "herp"), t(yamlColon, nil), - t(yamlScalar, "derp"), t(yamlStreamEnd, nil)]) + [t(yamlLineStart, ""), t(yamlScalarPart, "foo"), t(yamlColon, nil), + t(yamlLineStart, " "), t(yamlScalarPart, "bar"), t(yamlColon, nil), + t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "baz"), + t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "biz"), + t(yamlLineStart, " "), t(yamlScalarPart, "herp"), t(yamlColon, nil), + t(yamlScalarPart, "derp"), t(yamlStreamEnd, nil)]) test "Lexing: Anchor": - ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"), + ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"), t(yamlColon, nil), t(yamlAnchor, "bar"), t(yamlStreamEnd, nil)]) test "Lexing: Alias": - ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"), + ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"), t(yamlColon, nil), t(yamlAlias, "bar"), t(yamlStreamEnd, nil)]) test "Lexing: Tag handle": ensure("!t!str tagged", [t(yamlLineStart, ""), t(yamlTagHandle, "!t!"), t(yamlTagSuffix, "str"), - t(yamlScalar, "tagged"), t(yamlStreamEnd, nil)]) + t(yamlScalarPart, "tagged"), + t(yamlStreamEnd, nil)]) test "Lexing: Verbatim tag handle": ensure("! tagged", [t(yamlLineStart, ""), t(yamlVerbatimTag, "tag:http://example.com/str"), - t(yamlScalar, "tagged"), t(yamlStreamEnd, nil)]) \ No newline at end of file + t(yamlScalarPart, "tagged"), t(yamlStreamEnd, nil)]) \ No newline at end of file diff --git a/test/parsing.nim b/test/parsing.nim index 18bffde..0d9f31f 100644 --- a/test/parsing.nim +++ b/test/parsing.nim @@ -127,4 +127,9 @@ suite "Parsing": endMap(), endDoc()) test "Parsing: Flow Map in Sequence": ensure("- {a: b}", startDoc(), startSequence(), startMap(), scalar("a"), - scalar("b"), endMap(), endSequence(), endDoc()) \ No newline at end of file + scalar("b"), endMap(), endSequence(), endDoc()) + test "Parsing: Multiline scalar (top level)": + ensure("a\nb \n c\nd", startDoc(), scalar("a b c d"), endDoc()) + test "Parsing: Multiline scalar (in map)": + ensure("a: b\n c\nd:\n e\n f", startDoc(), startMap(), scalar("a"), + scalar("b c"), scalar("d"), scalar("e f"), endMap(), endDoc()) \ No newline at end of file