Added support for multiline plain scalars

This commit is contained in:
Felix Krause 2015-12-14 21:26:34 +01:00
parent b56f9cb537
commit 3ca3081e92
4 changed files with 154 additions and 63 deletions

View File

@ -28,7 +28,7 @@ type
yamlLiteralScalar, yamlFoldedScalar, yamlLiteralScalar, yamlFoldedScalar,
yamlBlockIndentationIndicator, yamlBlockChompingIndicator, yamlBlockIndentationIndicator, yamlBlockChompingIndicator,
# scalar content # scalar content
yamlScalar, yamlBlockScalarLine, yamlScalar, yamlScalarPart,
# tags # tags
yamlVerbatimTag, yamlTagSuffix, yamlVerbatimTag, yamlTagSuffix,
# anchoring # anchoring
@ -433,7 +433,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylPlainScalar: of ylPlainScalar:
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar) yieldToken(yamlScalarPart)
state = ylLineEnd state = ylLineEnd
continue continue
of ':': of ':':
@ -459,7 +459,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
case c case c
of ' ', '\t', EndOfFile, '\r', '\x0A': of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar) yieldToken(yamlScalarPart)
state = ylInitialInLine state = ylInitialInLine
else: else:
my.content.add(trailingSpace) my.content.add(trailingSpace)
@ -472,7 +472,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
trailingSpace = "" trailingSpace = ""
yieldToken(yamlScalar) yieldToken(yamlScalarPart)
state = ylLineEnd state = ylLineEnd
continue continue
of ' ', '\t': of ' ', '\t':
@ -814,7 +814,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylBlockScalar: of ylBlockScalar:
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
yieldToken(yamlBlockScalarLine) yieldToken(yamlScalarPart)
state = ylLineEnd state = ylLineEnd
continue continue
else: else:

View File

@ -29,12 +29,13 @@ type
YamlParserState = enum YamlParserState = enum
ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag, ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag,
ylBlockAfterAnchor, ylBlockAfterScalar, ylBlockAfterColon, ylBlockAfterAnchor, ylBlockAfterScalar, ylBlockAfterColon,
ylBlockLineEnd, ylFlow, ylFlowAfterObject, ylExpectingDocumentEnd ylBlockMultilineScalar, ylBlockLineEnd, ylFlow, ylFlowAfterObject,
ylExpectingDocumentEnd
DocumentLevelMode = enum DocumentLevelMode = enum
mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey, mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey,
mExplicitBlockMapValue, mImplicitBlockMapKey, mImplicitBlockMapValue, mExplicitBlockMapValue, mImplicitBlockMapKey, mImplicitBlockMapValue,
mFlowMapKey, mFlowMapValue, mUnknown mFlowMapKey, mFlowMapValue, mPlainScalar, mScalar, mUnknown
DocumentLevel = object DocumentLevel = object
mode: DocumentLevelMode mode: DocumentLevelMode
@ -90,15 +91,26 @@ template closeLevel(lvl: DocumentLevel) {.dirty.} =
yield YamlParserEvent(kind: yamlEndMap) yield YamlParserEvent(kind: yamlEndMap)
of mBlockSequenceItem, mFlowSequenceItem: of mBlockSequenceItem, mFlowSequenceItem:
yield YamlParserEvent(kind: yamlEndSequence) yield YamlParserEvent(kind: yamlEndSequence)
of mScalar:
yield YamlParserEvent(kind: yamlScalar, scalarAnchor: anchor,
scalarTag: tag, scalarContent: scalarCache)
anchor = nil
tag = nil
else: else:
yieldScalar() yieldScalar()
template leaveMoreIndentedLevels() {.dirty.} = template leaveMoreIndentedLevels() {.dirty.} =
while level.indicatorColumn > lex.column or while ancestry.len > 0:
(level.indicatorColumn == -1 and let parent = ancestry[ancestry.high]
level.indentationColumn > lex.column): if parent.indicatorColumn >= lex.column or
closeLevel(level) (parent.indicatorColumn == -1 and
level = ancestry.pop() parent.indentationColumn >= lex.column):
closeLevel(level)
level = ancestry.pop()
if level.mode == mImplicitBlockMapValue:
level.mode = mImplicitBlockMapKey
else:
break
template closeAllLevels() {.dirty.} = template closeAllLevels() {.dirty.} =
while true: while true:
@ -131,17 +143,25 @@ template handleBlockIndicator(expected, next: DocumentLevelMode,
iterator events*(input: Stream): YamlParserEvent {.closure.} = iterator events*(input: Stream): YamlParserEvent {.closure.} =
var var
# parsing state
lex: YamlLexer lex: YamlLexer
state = ylInitial
# document state
foundYamlDirective = false foundYamlDirective = false
tagShorthands = initTable[string, string]() tagShorthands = initTable[string, string]()
# object tree state
ancestry = newSeq[DocumentLevel]() ancestry = newSeq[DocumentLevel]()
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1) indentationColumn: -1)
cachedScalar: YamlParserEvent
cachedScalarIndentation: int # cached values
tag: string = nil tag: string = nil
anchor: string = nil anchor: string = nil
state = ylInitial scalarCache: string = nil
scalarIndentation: int
lex.open(input) lex.open(input)
var nextToken = tokens var nextToken = tokens
@ -238,17 +258,32 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
of yamlAnchor: of yamlAnchor:
anchor = lex.content anchor = lex.content
state = ylBlockAfterAnchor state = ylBlockAfterAnchor
of yamlScalarPart:
leaveMoreIndentedLevels()
case level.mode
of mUnknown:
level.mode = mScalar
scalarCache = lex.content
scalarIndentation = lex.column
of mImplicitBlockMapKey:
scalarCache = lex.content
scalarIndentation = lex.column
of mImplicitBlockMapValue:
ancestry.add(level)
scalarCache = lex.content
scalarIndentation = lex.column
level = DocumentLevel(mode: mScalar, indicatorColumn: -1,
indentationColumn:
ancestry[ancestry.high].indentationColumn + 1)
else:
yieldError("Unexpected scalar")
state = ylBlockAfterScalar
of lexer.yamlScalar: of lexer.yamlScalar:
leaveMoreIndentedLevels() leaveMoreIndentedLevels()
case level.mode case level.mode
of mUnknown, mImplicitBlockMapKey: of mUnknown, mImplicitBlockMapKey:
cachedScalar = YamlParserEvent(kind: yamlScalar, scalarCache = lex.content
scalarAnchor: anchor, scalarIndentation = lex.column
scalarTag: tag,
scalarContent: lex.content)
anchor = nil
tag = nil
cachedScalarIndentation = lex.column
state = ylBlockAfterScalar state = ylBlockAfterScalar
else: else:
yieldError("Unexpected scalar") yieldError("Unexpected scalar")
@ -267,35 +302,73 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
state = ylFlow state = ylFlow
continue continue
else: else:
yieldError("Unexpected token: " & $token) yieldError("[block line start] Unexpected token: " & $token)
of ylBlockMultilineScalar:
case token
of yamlScalarPart:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
continue
scalarCache &= " " & lex.content
state = ylBlockLineEnd
of yamlLineStart:
discard
of yamlColon, yamlDash, yamlQuestionMark:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
continue
yieldError("[multiline scalar ?:-] Unexpected token: " & $token)
of yamlDocumentEnd, yamlStreamEnd:
closeAllLevels()
scalarCache = nil
state = ylExpectingDocumentEnd
continue
of yamlDirectivesEnd:
closeAllLevels()
state = ylInitial
continue
else:
yieldError("[multiline scalar] Unexpected token: " & $token)
of ylBlockAfterScalar: of ylBlockAfterScalar:
case token case token
of yamlColon: of yamlColon:
assert level.mode == mUnknown or assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar]
level.mode == mImplicitBlockMapKey if level.mode in [mUnknown, mScalar]:
if level.mode == mUnknown: level.indentationColumn = scalarIndentation
level.indentationColumn = cachedScalarIndentation
yieldStart(yamlStartMap) yieldStart(yamlStartMap)
level.mode = mImplicitBlockMapValue level.mode = mImplicitBlockMapValue
ancestry.add(level) ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1) indentationColumn: -1)
yield cachedScalar yield YamlParserEvent(kind: yamlScalar,
cachedScalar = nil scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil
state = ylBlockAfterColon state = ylBlockAfterColon
of yamlLineStart: of yamlLineStart:
if level.mode == mImplicitBlockMapKey: if level.mode == mImplicitBlockMapKey:
yieldError("Missing colon after implicit map key") yieldError("Missing colon after implicit map key")
yield cachedScalar if level.mode != mScalar:
cachedScalar = nil yield YamlParserEvent(kind: yamlScalar,
if ancestry.len > 0: scalarAnchor: anchor,
level = ancestry.pop() scalarTag: tag,
state = ylBlockLineStart scalarContent: scalarCache)
scalarCache = nil
if ancestry.len > 0:
level = ancestry.pop()
else:
state = ylExpectingDocumentEnd
else: else:
state = ylExpectingDocumentEnd state = ylBlockMultilineScalar
of yamlStreamEnd: of yamlStreamEnd:
yield cachedScalar yield YamlParserEvent(kind: yamlScalar,
cachedScalar = nil scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil
if ancestry.len > 0: if ancestry.len > 0:
level = ancestry.pop() level = ancestry.pop()
closeAllLevels() closeAllLevels()
@ -339,6 +412,16 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
assert level.mode == mImplicitBlockMapValue assert level.mode == mImplicitBlockMapValue
level.mode = mImplicitBlockMapKey level.mode = mImplicitBlockMapKey
state = ylBlockLineEnd state = ylBlockLineEnd
of yamlScalarPart:
level.mode = mScalar
scalarCache = lex.content
if ancestry[ancestry.high].indicatorColumn != -1:
level.indentationColumn =
ancestry[ancestry.high].indicatorColumn + 1
else:
level.indentationColumn =
ancestry[ancestry.high].indentationColumn + 1
state = ylBlockLineEnd
of yamlLineStart: of yamlLineStart:
state = ylBlockLineStart state = ylBlockLineStart
of yamlStreamEnd: of yamlStreamEnd:
@ -354,7 +437,8 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
of ylBlockLineEnd: of ylBlockLineEnd:
case token case token
of yamlLineStart: of yamlLineStart:
state = ylBlockLineStart state = if level.mode == mScalar: ylBlockMultilineScalar else:
ylBlockLineStart
of yamlStreamEnd: of yamlStreamEnd:
closeAllLevels() closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument) yield YamlParserEvent(kind: yamlEndDocument)
@ -365,7 +449,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
case token case token
of yamlLineStart: of yamlLineStart:
discard discard
of lexer.yamlScalar: of lexer.yamlScalar, yamlScalarPart:
yieldScalar(lex.content) yieldScalar(lex.content)
level = ancestry.pop() level = ancestry.pop()
state = ylFlowAfterObject state = ylFlowAfterObject

View File

@ -81,7 +81,7 @@ suite "Lexing":
test "Lexing: Directive after Document End": test "Lexing: Directive after Document End":
ensure("content\n...\n%YAML 1.2", ensure("content\n...\n%YAML 1.2",
[t(yamlLineStart, ""), [t(yamlLineStart, ""),
t(yamlScalar, "content"), t(yamlScalarPart, "content"),
t(yamlLineStart, ""), t(yamlLineStart, ""),
t(yamlDocumentEnd, nil), t(yamlDocumentEnd, nil),
t(yamlYamlDirective, nil), t(yamlYamlDirective, nil),
@ -91,24 +91,24 @@ suite "Lexing":
test "Lexing: Plain Scalar (alphanumeric)": test "Lexing: Plain Scalar (alphanumeric)":
ensure("abA03rel4", [t(yamlLineStart, ""), ensure("abA03rel4", [t(yamlLineStart, ""),
t(yamlScalar, "abA03rel4"), t(yamlScalarPart, "abA03rel4"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Plain Scalar (with spaces)": test "Lexing: Plain Scalar (with spaces)":
ensure("test content", [t(yamlLineStart, ""), ensure("test content", [t(yamlLineStart, ""),
t(yamlScalar, "test content"), t(yamlScalarPart, "test content"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Plain Scalar (with special chars)": test "Lexing: Plain Scalar (with special chars)":
ensure(":test ?content -with #special !chars", ensure(":test ?content -with #special !chars",
[t(yamlLineStart, nil), [t(yamlLineStart, nil),
t(yamlScalar, ":test ?content -with #special !chars"), t(yamlScalarPart, ":test ?content -with #special !chars"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Plain Scalar (starting with %)": test "Lexing: Plain Scalar (starting with %)":
ensure("---\n%test", [t(yamlDirectivesEnd, nil), ensure("---\n%test", [t(yamlDirectivesEnd, nil),
t(yamlLineStart, ""), t(yamlLineStart, ""),
t(yamlScalar, "%test"), t(yamlScalarPart, "%test"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Single Quoted Scalar": test "Lexing: Single Quoted Scalar":
@ -141,24 +141,25 @@ suite "Lexing":
test "Lexing: Block Array": test "Lexing: Block Array":
ensure(""" ensure("""
- a - a
- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "a"), - b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "a"),
t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalar, "b"), t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "b"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Block Map with Implicit Keys": test "Lexing: Block Map with Implicit Keys":
ensure(""" ensure("""
foo: bar foo: bar
herp: derp""", [t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), herp: derp""", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlScalar, "bar"), t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalarPart, "bar"),
t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlScalar, "derp"), t(yamlLineStart, ""), t(yamlScalarPart, "herp"),
t(yamlColon, nil), t(yamlScalarPart, "derp"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Block Map with Explicit Keys": test "Lexing: Block Map with Explicit Keys":
ensure(""" ensure("""
? foo ? foo
: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil), t(yamlScalar, "foo"), : bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil),
t(yamlLineStart, ""), t(yamlColon, nil), t(yamlScalar, "bar"), t(yamlScalarPart, "foo"), t(yamlLineStart, ""), t(yamlColon, nil),
t(yamlStreamEnd, nil)]) t(yamlScalarPart, "bar"), t(yamlStreamEnd, nil)])
test "Lexing: Indentation": test "Lexing: Indentation":
ensure(""" ensure("""
@ -167,30 +168,31 @@ foo:
- baz - baz
- biz - biz
herp: derp""", herp: derp""",
[t(yamlLineStart, ""), t(yamlScalar, "foo"), t(yamlColon, nil), [t(yamlLineStart, ""), t(yamlScalarPart, "foo"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlScalar, "bar"), t(yamlColon, nil), t(yamlLineStart, " "), t(yamlScalarPart, "bar"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "baz"), t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "baz"),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalar, "biz"), t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "biz"),
t(yamlLineStart, " "), t(yamlScalar, "herp"), t(yamlColon, nil), t(yamlLineStart, " "), t(yamlScalarPart, "herp"), t(yamlColon, nil),
t(yamlScalar, "derp"), t(yamlStreamEnd, nil)]) t(yamlScalarPart, "derp"), t(yamlStreamEnd, nil)])
test "Lexing: Anchor": test "Lexing: Anchor":
ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"), ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlColon, nil), t(yamlAnchor, "bar"), t(yamlColon, nil), t(yamlAnchor, "bar"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Alias": test "Lexing: Alias":
ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalar, "foo"), ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlColon, nil), t(yamlAlias, "bar"), t(yamlColon, nil), t(yamlAlias, "bar"),
t(yamlStreamEnd, nil)]) t(yamlStreamEnd, nil)])
test "Lexing: Tag handle": test "Lexing: Tag handle":
ensure("!t!str tagged", [t(yamlLineStart, ""), t(yamlTagHandle, "!t!"), ensure("!t!str tagged", [t(yamlLineStart, ""), t(yamlTagHandle, "!t!"),
t(yamlTagSuffix, "str"), t(yamlTagSuffix, "str"),
t(yamlScalar, "tagged"), t(yamlStreamEnd, nil)]) t(yamlScalarPart, "tagged"),
t(yamlStreamEnd, nil)])
test "Lexing: Verbatim tag handle": test "Lexing: Verbatim tag handle":
ensure("!<tag:http://example.com/str> tagged", ensure("!<tag:http://example.com/str> tagged",
[t(yamlLineStart, ""), [t(yamlLineStart, ""),
t(yamlVerbatimTag, "tag:http://example.com/str"), t(yamlVerbatimTag, "tag:http://example.com/str"),
t(yamlScalar, "tagged"), t(yamlStreamEnd, nil)]) t(yamlScalarPart, "tagged"), t(yamlStreamEnd, nil)])

View File

@ -127,4 +127,9 @@ suite "Parsing":
endMap(), endDoc()) endMap(), endDoc())
test "Parsing: Flow Map in Sequence": test "Parsing: Flow Map in Sequence":
ensure("- {a: b}", startDoc(), startSequence(), startMap(), scalar("a"), ensure("- {a: b}", startDoc(), startSequence(), startMap(), scalar("a"),
scalar("b"), endMap(), endSequence(), endDoc()) scalar("b"), endMap(), endSequence(), endDoc())
test "Parsing: Multiline scalar (top level)":
ensure("a\nb \n c\nd", startDoc(), scalar("a b c d"), endDoc())
test "Parsing: Multiline scalar (in map)":
ensure("a: b\n c\nd:\n e\n f", startDoc(), startMap(), scalar("a"),
scalar("b c"), scalar("d"), scalar("e f"), endMap(), endDoc())