NimYAML/src/yaml/sequential.nim

604 lines
24 KiB
Nim
Raw Normal View History

2015-12-05 11:10:17 +00:00
import streams, tables, strutils
import "private/lexer"
2015-12-05 11:10:17 +00:00
type
YamlParserEventKind* = enum
yamlStartDocument, yamlEndDocument, yamlStartMap, yamlEndMap,
yamlStartSequence, yamlEndSequence, yamlScalar, yamlAlias,
yamlError, yamlWarning
YamlParserEvent* = ref object
case kind*: YamlParserEventKind
of yamlStartMap, yamlStartSequence:
objAnchor* : string # may be nil, may not be empty
objTag* : string # may not be nil or empty, is a complete URI.
of yamlScalar:
scalarAnchor* : string # may be nil
scalarTag* : string # may not be nil, is a complete URI.
scalarContent*: string # may not be nil (but empty)
of yamlEndMap, yamlEndSequence, yamlStartDocument, yamlEndDocument:
discard
of yamlAlias:
aliasName* : string # may not be nil nor empty
of yamlError, yamlWarning:
description* : string
line* : int
column* : int
YamlParserState = enum
2015-12-11 21:55:21 +00:00
ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag,
ylBlockAfterAnchor, ylBlockAfterScalar, ylBlockAfterColon,
ylBlockMultilineScalar, ylBlockLineEnd, ylFlow, ylFlowAfterObject,
ylExpectingDocumentEnd
2015-12-05 11:10:17 +00:00
2015-12-11 21:55:21 +00:00
DocumentLevelMode = enum
mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey,
mExplicitBlockMapValue, mImplicitBlockMapKey, mImplicitBlockMapValue,
mFlowMapKey, mFlowMapValue, mPlainScalar, mScalar, mUnknown
2015-12-05 11:10:17 +00:00
DocumentLevel = object
2015-12-11 21:55:21 +00:00
mode: DocumentLevelMode
2015-12-05 11:10:17 +00:00
indicatorColumn: int
2015-12-11 21:55:21 +00:00
indentationColumn: int
proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool =
if left.kind != right.kind:
return false
case left.kind
of yamlStartDocument, yamlEndDocument, yamlEndMap, yamlEndSequence:
result = true
of yamlStartMap, yamlStartSequence:
result = left.objAnchor == right.objAnchor and
left.objTag == right.objTag
of yamlScalar:
result = left.scalarAnchor == right.scalarAnchor and
left.scalarTag == right.scalarTag and
left.scalarContent == right.scalarContent
of yamlAlias:
result = left.aliasName == right.aliasName
of yamlError, yamlWarning:
result = left.description == right.description and
left.line == right.line and left.column == right.column
2015-12-05 11:10:17 +00:00
template yieldWarning(d: string) {.dirty.} =
yield YamlParserEvent(kind: yamlWarning, description: d,
line: lex.line, column: lex.column)
template yieldError(d: string) {.dirty.} =
yield YamlParserEvent(kind: yamlError, description: d,
line: lex.line, column: lex.column)
2015-12-11 21:55:21 +00:00
break parserLoop
2015-12-05 11:10:17 +00:00
2015-12-11 21:55:21 +00:00
template yieldScalar(content: string = "") {.dirty.} =
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: anchor, scalarTag: tag,
scalarContent: content)
anchor = nil
tag = nil
2015-12-05 11:10:17 +00:00
2015-12-11 21:55:21 +00:00
template yieldStart(k: YamlParserEventKind) {.dirty.} =
yield YamlParserEvent(kind: k, objAnchor: anchor, objTag: tag)
anchor = nil
tag = nil
2015-12-05 11:10:17 +00:00
2015-12-11 21:55:21 +00:00
template closeLevel(lvl: DocumentLevel) {.dirty.} =
case lvl.mode
of mExplicitBlockMapKey, mFlowMapKey:
yieldError("Missing Map value!")
of mExplicitBlockMapValue, mImplicitBlockMapKey, mImplicitBlockMapValue,
mFlowMapValue:
yield YamlParserEvent(kind: yamlEndMap)
of mBlockSequenceItem, mFlowSequenceItem:
yield YamlParserEvent(kind: yamlEndSequence)
of mScalar:
yield YamlParserEvent(kind: yamlScalar, scalarAnchor: anchor,
scalarTag: tag, scalarContent: scalarCache)
anchor = nil
tag = nil
2015-12-11 21:55:21 +00:00
else:
yieldScalar()
template leaveMoreIndentedLevels() {.dirty.} =
while ancestry.len > 0:
let parent = ancestry[ancestry.high]
if parent.indicatorColumn >= lex.column or
(parent.indicatorColumn == -1 and
parent.indentationColumn >= lex.column):
closeLevel(level)
level = ancestry.pop()
if level.mode == mImplicitBlockMapValue:
level.mode = mImplicitBlockMapKey
else:
break
2015-12-11 21:55:21 +00:00
template closeAllLevels() {.dirty.} =
2015-12-11 21:55:21 +00:00
while true:
closeLevel(level)
if ancestry.len == 0: break
level = ancestry.pop()
template handleBlockIndicator(expected, next: DocumentLevelMode,
entering: YamlParserEventKind) {.dirty.} =
leaveMoreIndentedLevels()
if level.indicatorColumn == lex.column:
if level.mode == expected:
level.mode = next
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
else:
yieldError("Invalid token after " & $level.mode)
elif level.mode != mUnknown:
yieldError("Invalid indentation")
elif entering == yamlError:
yieldError("Unexpected token: " & $token)
else:
level.mode = next
level.indicatorColumn = lex.column
yield YamlParserEvent(kind: entering)
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
iterator events*(input: Stream): YamlParserEvent {.closure.} =
2015-12-05 11:10:17 +00:00
var
# parsing state
2015-12-11 21:55:21 +00:00
lex: YamlLexer
state = ylInitial
# document state
2015-12-05 11:10:17 +00:00
foundYamlDirective = false
tagShorthands = initTable[string, string]()
# object tree state
2015-12-11 21:55:21 +00:00
ancestry = newSeq[DocumentLevel]()
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
# cached values
2015-12-11 21:55:21 +00:00
tag: string = nil
anchor: string = nil
scalarCache: string = nil
scalarIndentation: int
2015-12-05 11:10:17 +00:00
lex.open(input)
var nextToken = tokens
2015-12-05 11:10:17 +00:00
var token = nextToken(lex)
2015-12-11 21:55:21 +00:00
block parserLoop:
while not finished(nextToken):
2015-12-05 11:10:17 +00:00
case state
of ylInitial:
2015-12-11 21:55:21 +00:00
case token
2015-12-05 11:10:17 +00:00
of yamlYamlDirective:
if foundYamlDirective:
2015-12-11 21:55:21 +00:00
yieldError("Duplicate %YAML directive")
var
warn = false
actualVersion = ""
for version in [1, 2]:
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing or badly formatted YAML version")
if token != yamlVersionPart:
yieldError("Missing or badly formatted YAML version")
if parseInt(lex.content) != version:
warn = true
if actualVersion.len > 0: actualVersion &= "."
actualVersion &= $version
if warn:
yieldWarning("Unsupported version: " & actualVersion &
", trying to parse anyway")
foundYamlDirective = true
2015-12-05 11:10:17 +00:00
of yamlTagDirective:
token = nextToken(lex)
if finished(nextToken):
yieldError("Incomplete %TAG directive")
2015-12-11 21:55:21 +00:00
if token != yamlTagHandle:
2015-12-05 11:10:17 +00:00
yieldError("Invalid token (expected tag handle)")
let tagHandle = lex.content
token = nextToken(lex)
if finished(nextToken):
yieldError("Incomplete %TAG directive")
2015-12-11 21:55:21 +00:00
if token != yamlTagURI:
2015-12-05 11:10:17 +00:00
yieldError("Invalid token (expected tag URI)")
tagShorthands[tagHandle] = lex.content
of yamlUnknownDirective:
yieldWarning("Unknown directive: " & lex.content)
state = ylSkipDirective
of yamlComment:
discard
of yamlDirectivesEnd:
yield YamlParserEvent(kind: yamlStartDocument)
2015-12-11 21:55:21 +00:00
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylBlockLineStart
of yamlDocumentEnd, yamlStreamEnd:
2015-12-05 11:10:17 +00:00
yield YamlParserEvent(kind: yamlStartDocument)
yield YamlParserEvent(kind: yamlEndDocument)
else:
yield YamlParserEvent(kind: yamlStartDocument)
state = ylBlockLineStart
2015-12-05 11:10:17 +00:00
continue
of ylSkipDirective:
2015-12-11 21:55:21 +00:00
if token notin [yamlUnknownDirectiveParam, yamlTagHandle,
yamlTagURI, yamlVersionPart, yamlComment]:
2015-12-05 11:10:17 +00:00
state = ylInitial
continue
of ylBlockLineStart:
2015-12-11 21:55:21 +00:00
case token
2015-12-05 11:10:17 +00:00
of yamlLineStart:
discard
of yamlDash:
2015-12-11 21:55:21 +00:00
handleBlockIndicator(mBlockSequenceItem, mBlockSequenceItem,
yamlStartSequence)
of yamlQuestionmark:
handleBlockIndicator(mExplicitBlockMapValue,
mExplicitBlockMapKey, yamlStartMap)
of yamlColon:
handleBlockIndicator(mExplicitBlockMapKey,
mExplicitBlockMapValue, yamlError)
of yamlTagHandle:
let handle = lex.content
if tagShorthands.hasKey(handle):
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing tag suffix")
continue
2015-12-11 21:55:21 +00:00
if token != yamlTagSuffix:
yieldError("Missing tag suffix")
continue
2015-12-11 21:55:21 +00:00
tag = tagShorthands[handle] & lex.content
state = ylBlockAfterTag
else:
yieldError("Unknown tag shorthand: " & handle)
of yamlVerbatimTag:
2015-12-11 21:55:21 +00:00
tag = lex.content
of yamlAnchor:
anchor = lex.content
state = ylBlockAfterAnchor
of yamlScalarPart:
leaveMoreIndentedLevels()
case level.mode
of mUnknown:
level.mode = mScalar
scalarCache = lex.content
scalarIndentation = lex.column
of mImplicitBlockMapKey:
scalarCache = lex.content
scalarIndentation = lex.column
of mImplicitBlockMapValue:
ancestry.add(level)
scalarCache = lex.content
scalarIndentation = lex.column
level = DocumentLevel(mode: mScalar, indicatorColumn: -1,
indentationColumn:
ancestry[ancestry.high].indentationColumn + 1)
else:
yieldError("Unexpected scalar")
state = ylBlockAfterScalar
of lexer.yamlScalar:
2015-12-11 21:55:21 +00:00
leaveMoreIndentedLevels()
case level.mode
of mUnknown, mImplicitBlockMapKey:
scalarCache = lex.content
scalarIndentation = lex.column
2015-12-11 21:55:21 +00:00
state = ylBlockAfterScalar
else:
yieldError("Unexpected scalar")
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
of yamlDocumentEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
state = ylInitial
of yamlOpeningBrace:
state = ylFlow
continue
of yamlOpeningBracket:
state = ylFlow
continue
else:
yieldError("[block line start] Unexpected token: " & $token)
of ylBlockMultilineScalar:
case token
of yamlScalarPart:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
continue
scalarCache &= " " & lex.content
state = ylBlockLineEnd
of yamlLineStart:
discard
of yamlColon, yamlDash, yamlQuestionMark:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
continue
yieldError("[multiline scalar ?:-] Unexpected token: " & $token)
of yamlDocumentEnd, yamlStreamEnd:
closeAllLevels()
scalarCache = nil
state = ylExpectingDocumentEnd
continue
of yamlDirectivesEnd:
closeAllLevels()
state = ylInitial
continue
else:
yieldError("[multiline scalar] Unexpected token: " & $token)
of ylBlockAfterScalar:
2015-12-11 21:55:21 +00:00
case token
2015-12-05 11:10:17 +00:00
of yamlColon:
assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar]
if level.mode in [mUnknown, mScalar]:
level.indentationColumn = scalarIndentation
2015-12-11 21:55:21 +00:00
yieldStart(yamlStartMap)
level.mode = mImplicitBlockMapValue
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil
state = ylBlockAfterColon
of yamlLineStart:
2015-12-11 21:55:21 +00:00
if level.mode == mImplicitBlockMapKey:
yieldError("Missing colon after implicit map key")
if level.mode != mScalar:
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil
if ancestry.len > 0:
level = ancestry.pop()
else:
state = ylExpectingDocumentEnd
2015-12-11 21:55:21 +00:00
else:
state = ylBlockMultilineScalar
of yamlStreamEnd:
yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil
2015-12-11 21:55:21 +00:00
if ancestry.len > 0:
level = ancestry.pop()
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
2015-12-11 21:55:21 +00:00
yieldError("Unexpected token: " & $token)
of ylBlockAfterTag:
case token
of yamlAnchor:
anchor = lex.content
state = ylBlockAfterAnchor
of lexer.yamlScalar:
state = ylBlockLineStart
continue
of yamlLineStart:
state = ylBlockLineStart
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
continue
else:
yieldError("Unexpected token: " & $token)
of ylBlockAfterAnchor:
case token
of lexer.yamlScalar:
anchor = lex.content
state = ylBlockLineStart
continue
of yamlLineStart:
state = ylBlockLineStart
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
continue
else:
yieldError("Unexpected token: " & $token)
of ylBlockAfterColon:
2015-12-11 21:55:21 +00:00
case token
of lexer.yamlScalar:
2015-12-11 21:55:21 +00:00
yieldScalar(lex.content)
level = ancestry.pop()
assert level.mode == mImplicitBlockMapValue
level.mode = mImplicitBlockMapKey
state = ylBlockLineEnd
of yamlScalarPart:
level.mode = mScalar
scalarCache = lex.content
if ancestry[ancestry.high].indicatorColumn != -1:
level.indentationColumn =
ancestry[ancestry.high].indicatorColumn + 1
else:
level.indentationColumn =
ancestry[ancestry.high].indentationColumn + 1
state = ylBlockLineEnd
of yamlLineStart:
state = ylBlockLineStart
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
continue
else:
yieldError("Unexpected token (expected scalar or line end): " &
2015-12-11 21:55:21 +00:00
$token)
of ylBlockLineEnd:
2015-12-11 21:55:21 +00:00
case token
of yamlLineStart:
state = if level.mode == mScalar: ylBlockMultilineScalar else:
ylBlockLineStart
of yamlStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
2015-12-11 21:55:21 +00:00
yieldError("Unexpected token (expected line end):" & $token)
of ylFlow:
2015-12-11 21:55:21 +00:00
case token
of yamlLineStart:
discard
of lexer.yamlScalar, yamlScalarPart:
2015-12-11 21:55:21 +00:00
yieldScalar(lex.content)
level = ancestry.pop()
state = ylFlowAfterObject
of yamlColon:
2015-12-11 21:55:21 +00:00
yieldScalar()
level = ancestry.pop()
if level.mode == mFlowMapKey:
level.mode = mFlowMapValue
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
else:
yieldError(
"Unexpected token (expected scalar, comma or " &
2015-12-11 21:55:21 +00:00
" map end): " & $token)
of yamlComma:
2015-12-11 21:55:21 +00:00
yieldScalar()
level = ancestry.pop()
case level.mode
of mFlowMapValue:
level.mode = mFlowMapKey
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of mFlowSequenceItem:
yieldScalar()
else:
yieldError("Internal error! Please report this bug.")
of yamlOpeningBrace:
2015-12-11 21:55:21 +00:00
if level.mode != mUnknown:
yieldError("Unexpected token")
level.mode = mFlowMapKey
yieldStart(yamlStartMap)
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of yamlOpeningBracket:
2015-12-11 21:55:21 +00:00
if level.mode != mUnknown:
yieldError("Unexpected token")
level.mode = mFlowSequenceItem
yieldStart(yamlStartSequence)
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of yamlClosingBrace:
2015-12-11 21:55:21 +00:00
if level.mode == mUnknown:
yieldScalar()
level = ancestry.pop()
if level.mode != mFlowMapValue:
yieldError("Unexpected token")
yield YamlParserEvent(kind: yamlEndMap)
if ancestry.len > 0:
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
2015-12-11 21:55:21 +00:00
else:
state = ylBlockLineEnd
else:
state = ylExpectingDocumentEnd
of yamlClosingBracket:
2015-12-11 21:55:21 +00:00
if level.mode == mUnknown:
yieldScalar()
level = ancestry.pop()
if level.mode != mFlowSequenceItem:
yieldError("Unexpected token: " & $token)
else:
yield YamlParserEvent(kind: yamlEndSequence)
2015-12-11 21:55:21 +00:00
if ancestry.len > 0:
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
else:
state = ylBlockLineEnd
else:
2015-12-11 21:55:21 +00:00
state = ylExpectingDocumentEnd
else:
2015-12-11 21:55:21 +00:00
yieldError("Unexpected token: " & $token)
of ylFlowAfterObject:
2015-12-11 21:55:21 +00:00
case token
of yamlLineStart:
discard
of yamlColon:
2015-12-11 21:55:21 +00:00
if level.mode != mFlowMapKey:
yieldError("Unexpected token: " & $token)
else:
2015-12-11 21:55:21 +00:00
level.mode = mFlowMapValue
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
of yamlComma:
2015-12-11 21:55:21 +00:00
case level.mode
of mFlowSequenceItem:
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
of mFlowMapValue:
level.mode = mFlowMapKey
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
else:
2015-12-11 21:55:21 +00:00
yieldError("Unexpected token: " & $token)
of yamlClosingBrace:
2015-12-11 21:55:21 +00:00
if level.mode != mFlowMapValue:
yieldError("Unexpected token: " & $token)
else:
yield YamlParserEvent(kind: yamlEndMap)
2015-12-11 21:55:21 +00:00
if ancestry.len > 0:
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlow
else:
state = ylBlockLineEnd
else:
state = ylExpectingDocumentEnd
of yamlClosingBracket:
2015-12-11 21:55:21 +00:00
if level.mode != mFlowSequenceItem:
yieldError("Unexpected token: " & $token)
else:
yield YamlParserEvent(kind: yamlEndSequence)
2015-12-11 21:55:21 +00:00
if ancestry.len > 0:
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlow
else:
state = ylBlockLineEnd
else:
state = ylExpectingDocumentEnd
else:
yieldError("Unexpected token: " & $token)
of ylExpectingDocumentEnd:
case token
of yamlComment, yamlLineStart:
discard
of yamlStreamEnd, yamlDocumentEnd:
yield YamlParserEvent(kind: yamlEndDocument)
state = ylInitial
of yamlDirectivesEnd:
yield YamlParserEvent(kind: yamlEndDocument)
state = ylInitial
continue
else:
2015-12-11 21:55:21 +00:00
yieldError("Unexpected token (expected document end): " &
$token)
2015-12-05 11:10:17 +00:00
token = nextToken(lex)