Worked on tags

* introduced YamlSequentialParser object that stores
   all known tag URIs
 * Added procs for registering and querying tag URIs
 * Return non-specific tags instead of nil if no tags
   exist in source
This commit is contained in:
Felix Krause 2015-12-21 21:40:27 +01:00
parent edec7ece37
commit 70597105cb
3 changed files with 153 additions and 59 deletions

View File

@ -448,7 +448,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
else: else:
my.content.add(c) my.content.add(c)
of '[', ']', '{', '}': of '[', ']', '{', '}':
yieldToken(yamlScalar) yieldToken(yamlScalarPart)
state = ylInitialInLine state = ylInitialInLine
continue continue
else: else:

View File

@ -28,9 +28,10 @@ type
YamlParserState = enum YamlParserState = enum
ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag, ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag,
ylBlockAfterAnchor, ylBlockAfterScalar, ylBlockAfterColon, ylBlockAfterAnchor, ylBlockAfterAnchorAndTag, ylBlockAfterScalar,
ylBlockMultilineScalar, ylBlockLineEnd, ylBlockScalarHeader, ylBlockAfterColon, ylBlockMultilineScalar, ylBlockLineEnd,
ylBlockScalar, ylFlow, ylFlowAfterObject, ylExpectingDocumentEnd ylBlockScalarHeader, ylBlockScalar, ylFlow, ylFlowAfterObject,
ylExpectingDocumentEnd
DocumentLevelMode = enum DocumentLevelMode = enum
mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey, mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey,
@ -48,6 +49,50 @@ type
BlockScalarStyle = enum BlockScalarStyle = enum
bsLiteral, bsFolded bsLiteral, bsFolded
TagId = distinct int
YamlSequentialParser* = object
tags: OrderedTable[string, TagId]
const
tagNonSpecificEmark*: TagId = 0.TagId # "!" non-specific tag
tagNonSpecificQmark*: TagId = 1.TagId # "?" non-specific tag
# interface
proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool
proc `==`*(left, right: TagId): bool {.borrow.}
proc initParser*(): YamlSequentialParser
# iterators cannot be pre-declared.
#
# iterator events*(parser: YamlSequentialParser,
# input: Stream): YamlParserEvent
proc uri*(parser: YamlSequentialParser, id: TagId): string
proc registerUri*(parser: var YamlSequentialParser, uri: string): TagId
# implementation
proc initParser*(): YamlSequentialParser =
result.tags = initOrderedTable[string, TagId]()
result.tags["!"] = tagNonSpecificEmark
result.tags["?"] = tagNonSpecificQmark
proc uri*(parser: YamlSequentialParser, id: TagId): string =
for pair in parser.tags.pairs:
if pair[1] == id:
return pair[0]
return nil
proc registerUri*(parser: var YamlSequentialParser, uri: string): TagId =
result = cast[TagId](parser.tags.len)
if parser.tags.hasKeyOrPut(uri, result):
result = parser.tags[uri]
proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool = proc `==`*(left: YamlParserEvent, right: YamlParserEvent): bool =
if left.kind != right.kind: if left.kind != right.kind:
return false return false
@ -76,15 +121,17 @@ template yieldError(d: string) {.dirty.} =
line: lex.line, column: lex.column) line: lex.line, column: lex.column)
break parserLoop break parserLoop
template yieldScalar(content: string = "") {.dirty.} = template yieldScalar(content: string = "", quoted: bool = false) {.dirty.} =
let retTag = if isNil(tag): if quoted: "!" else: "?" else: tag
yield YamlParserEvent(kind: yamlScalar, yield YamlParserEvent(kind: yamlScalar,
scalarAnchor: anchor, scalarTag: tag, scalarAnchor: anchor, scalarTag: retTag,
scalarContent: content) scalarContent: content)
anchor = nil anchor = nil
tag = nil tag = nil
template yieldStart(k: YamlParserEventKind) {.dirty.} = template yieldStart(k: YamlParserEventKind) {.dirty.} =
yield YamlParserEvent(kind: k, objAnchor: anchor, objTag: tag) let retTag = if isNil(tag): "?" else: tag
yield YamlParserEvent(kind: k, objAnchor: anchor, objTag: retTag)
anchor = nil anchor = nil
tag = nil tag = nil
@ -98,8 +145,11 @@ template closeLevel(lvl: DocumentLevel) {.dirty.} =
of mBlockSequenceItem, mFlowSequenceItem: of mBlockSequenceItem, mFlowSequenceItem:
yield YamlParserEvent(kind: yamlEndSequence) yield YamlParserEvent(kind: yamlEndSequence)
of mScalar: of mScalar:
let retTag = if isNil(tag): if scalarCacheIsQuoted: "!" else: "?" else:
tag
yield YamlParserEvent(kind: yamlScalar, scalarAnchor: anchor, yield YamlParserEvent(kind: yamlScalar, scalarAnchor: anchor,
scalarTag: tag, scalarContent: scalarCache) scalarTag: retTag, scalarContent: scalarCache)
anchor = nil anchor = nil
tag = nil tag = nil
else: else:
@ -142,12 +192,33 @@ template handleBlockIndicator(expected, next: DocumentLevelMode,
else: else:
level.mode = next level.mode = next
level.indicatorColumn = lex.column level.indicatorColumn = lex.column
yield YamlParserEvent(kind: entering) yieldStart(entering)
ancestry.add(level) ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1) indentationColumn: -1)
iterator events*(input: Stream): YamlParserEvent {.closure.} = template startPlainScalar() {.dirty.} =
level.mode = mScalar
scalarCache = lex.content
state = ylBlockAfterScalar
template handleTagHandle() {.dirty.} =
let handle = lex.content
if tagShorthands.hasKey(handle):
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing tag suffix")
continue
if token != yamlTagSuffix:
yieldError("Missing tag suffix")
continue
tag = tagShorthands[handle] & lex.content
level.indentationColumn = lex.column
else:
yieldError("Unknown tag shorthand: " & handle)
iterator events*(parser: YamlSequentialParser,
input: Stream): YamlParserEvent {.closure.} =
var var
# parsing state # parsing state
lex: YamlLexer lex: YamlLexer
@ -173,6 +244,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
anchor: string = nil anchor: string = nil
scalarCache: string = nil scalarCache: string = nil
scalarIndentation: int scalarIndentation: int
scalarCacheIsQuoted: bool = false
lex.open(input) lex.open(input)
@ -259,37 +331,30 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
scalarCache = "" scalarCache = ""
level.mode = mScalar level.mode = mScalar
of yamlTagHandle: of yamlTagHandle:
let handle = lex.content handleTagHandle()
if tagShorthands.hasKey(handle): state = ylBlockAfterTag
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing tag suffix")
continue
if token != yamlTagSuffix:
yieldError("Missing tag suffix")
continue
tag = tagShorthands[handle] & lex.content
state = ylBlockAfterTag
else:
yieldError("Unknown tag shorthand: " & handle)
of yamlVerbatimTag: of yamlVerbatimTag:
tag = lex.content tag = lex.content
state = ylBlockAfterTag
level.indentationColumn = lex.column
of yamlAnchor: of yamlAnchor:
anchor = lex.content anchor = lex.content
level.indentationColumn = lex.column
state = ylBlockAfterAnchor state = ylBlockAfterAnchor
of yamlScalarPart: of yamlScalarPart:
leaveMoreIndentedLevels() leaveMoreIndentedLevels()
case level.mode case level.mode
of mUnknown: of mUnknown:
level.mode = mScalar startPlainScalar()
scalarCache = lex.content level.indentationColumn = lex.column
scalarIndentation = lex.column
of mImplicitBlockMapKey: of mImplicitBlockMapKey:
scalarCache = lex.content scalarCache = lex.content
scalarCacheIsQuoted = false
scalarIndentation = lex.column scalarIndentation = lex.column
of mImplicitBlockMapValue: of mImplicitBlockMapValue:
ancestry.add(level) ancestry.add(level)
scalarCache = lex.content scalarCache = lex.content
scalarCacheIsQuoted = false
scalarIndentation = lex.column scalarIndentation = lex.column
level = DocumentLevel(mode: mScalar, indicatorColumn: -1, level = DocumentLevel(mode: mScalar, indicatorColumn: -1,
indentationColumn: indentationColumn:
@ -302,6 +367,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
case level.mode case level.mode
of mUnknown, mImplicitBlockMapKey: of mUnknown, mImplicitBlockMapKey:
scalarCache = lex.content scalarCache = lex.content
scalarCacheIsQuoted = true
scalarIndentation = lex.column scalarIndentation = lex.column
state = ylBlockAfterScalar state = ylBlockAfterScalar
else: else:
@ -356,25 +422,21 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar] assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar]
if level.mode in [mUnknown, mScalar]: if level.mode in [mUnknown, mScalar]:
level.indentationColumn = scalarIndentation level.indentationColumn = scalarIndentation
yieldStart(yamlStartMap) # tags and anchors are for key scalar, not for map.
yield YamlParserEvent(kind: yamlStartMap,
objAnchor: nil, objTag: "?")
level.mode = mImplicitBlockMapValue level.mode = mImplicitBlockMapValue
ancestry.add(level) ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1) indentationColumn: -1)
yield YamlParserEvent(kind: yamlScalar, yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil scalarCache = nil
state = ylBlockAfterColon state = ylBlockAfterColon
of yamlLineStart: of yamlLineStart:
if level.mode == mImplicitBlockMapKey: if level.mode == mImplicitBlockMapKey:
yieldError("Missing colon after implicit map key") yieldError("Missing colon after implicit map key")
if level.mode != mScalar: if level.mode != mScalar:
yield YamlParserEvent(kind: yamlScalar, yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil scalarCache = nil
if ancestry.len > 0: if ancestry.len > 0:
level = ancestry.pop() level = ancestry.pop()
@ -383,10 +445,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
else: else:
state = ylBlockMultilineScalar state = ylBlockMultilineScalar
of yamlStreamEnd: of yamlStreamEnd:
yield YamlParserEvent(kind: yamlScalar, yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarAnchor: anchor,
scalarTag: tag,
scalarContent: scalarCache)
scalarCache = nil scalarCache = nil
if ancestry.len > 0: if ancestry.len > 0:
level = ancestry.pop() level = ancestry.pop()
@ -399,10 +458,12 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
case token case token
of yamlAnchor: of yamlAnchor:
anchor = lex.content anchor = lex.content
state = ylBlockAfterAnchor state = ylBlockAfterAnchorAndTag
of lexer.yamlScalar: of lexer.yamlScalar:
state = ylBlockLineStart state = ylBlockLineStart
continue continue
of yamlScalarPart:
startPlainScalar()
of yamlLineStart: of yamlLineStart:
state = ylBlockLineStart state = ylBlockLineStart
of yamlOpeningBracket, yamlOpeningBrace: of yamlOpeningBracket, yamlOpeningBrace:
@ -413,11 +474,33 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
of ylBlockAfterAnchor: of ylBlockAfterAnchor:
case token case token
of lexer.yamlScalar: of lexer.yamlScalar:
anchor = lex.content
state = ylBlockLineStart state = ylBlockLineStart
continue continue
of lexer.yamlScalarPart:
startPlainScalar()
of yamlLineStart: of yamlLineStart:
discard
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
continue
of yamlTagHandle:
handleTagHandle()
state = ylBlockAfterTag
of yamlVerbatimTag:
tag = lex.content
state = ylBlockAfterTag
level.indentationColumn = lex.column
else:
yieldError("Unexpected token: " & $token)
of ylBlockAfterAnchorAndTag:
case token
of lexer.yamlScalar:
state = ylBlockLineStart state = ylBlockLineStart
continue
of yamlScalarPart:
startPlainScalar()
of yamlLineStart:
discard
of yamlOpeningBracket, yamlOpeningBrace: of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow state = ylFlow
continue continue
@ -426,21 +509,13 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
of ylBlockAfterColon: of ylBlockAfterColon:
case token case token
of lexer.yamlScalar: of lexer.yamlScalar:
yieldScalar(lex.content) yieldScalar(lex.content, true)
level = ancestry.pop() level = ancestry.pop()
assert level.mode == mImplicitBlockMapValue assert level.mode == mImplicitBlockMapValue
level.mode = mImplicitBlockMapKey level.mode = mImplicitBlockMapKey
state = ylBlockLineEnd state = ylBlockLineEnd
of yamlScalarPart: of yamlScalarPart:
level.mode = mScalar startPlainScalar()
scalarCache = lex.content
if ancestry[ancestry.high].indicatorColumn != -1:
level.indentationColumn =
ancestry[ancestry.high].indicatorColumn + 1
else:
level.indentationColumn =
ancestry[ancestry.high].indentationColumn + 1
state = ylBlockLineEnd
of yamlLineStart: of yamlLineStart:
state = ylBlockLineStart state = ylBlockLineStart
of yamlStreamEnd: of yamlStreamEnd:
@ -554,7 +629,7 @@ iterator events*(input: Stream): YamlParserEvent {.closure.} =
of yamlLineStart: of yamlLineStart:
discard discard
of lexer.yamlScalar, yamlScalarPart: of lexer.yamlScalar, yamlScalarPart:
yieldScalar(lex.content) yieldScalar(lex.content, token == lexer.yamlScalar)
level = ancestry.pop() level = ancestry.pop()
state = ylFlowAfterObject state = ylFlowAfterObject
of yamlColon: of yamlColon:

View File

@ -11,15 +11,15 @@ proc endDoc(): YamlParserEvent =
new(result) new(result)
result.kind = yamlEndDocument result.kind = yamlEndDocument
proc scalar(content: string, proc scalar(content: string, tag: string = "?",
anchor: string = nil, tag: string = nil): YamlParserEvent = anchor: string = nil): YamlParserEvent =
new(result) new(result)
result.kind = yamlScalar result.kind = yamlScalar
result.scalarAnchor = anchor result.scalarAnchor = anchor
result.scalarTag = tag result.scalarTag = tag
result.scalarContent = content result.scalarContent = content
proc startSequence(anchor: string = nil, tag: string = nil): YamlParserEvent = proc startSequence(anchor: string = nil, tag: string = "?"): YamlParserEvent =
new(result) new(result)
result.kind = yamlStartSequence result.kind = yamlStartSequence
result.objAnchor = anchor result.objAnchor = anchor
@ -29,7 +29,7 @@ proc endSequence(): YamlParserEvent =
new(result) new(result)
result.kind = yamlEndSequence result.kind = yamlEndSequence
proc startMap(anchor: string = nil, tag: string = nil): YamlParserEvent = proc startMap(anchor: string = nil, tag: string = "?"): YamlParserEvent =
new(result) new(result)
result.kind = yamlStartMap result.kind = yamlStartMap
result.objAnchor = anchor result.objAnchor = anchor
@ -51,8 +51,17 @@ proc printDifference(expected, actual: YamlParserEvent) =
case expected.kind case expected.kind
of yamlScalar: of yamlScalar:
if expected.scalarTag != actual.scalarTag: if expected.scalarTag != actual.scalarTag:
echo "[scalar] expected tag " & expected.scalarTag & ", got " & if isNil(expected.scalarTag):
actual.scalarTag echo "[\"" & actual.scalarContent &
"\".tag] expected <nil>, got " & actual.scalarTag
elif isNil(actual.scalarTag):
echo "[\"" & actual.scalarContent &
"\".tag] expected " & expected.scalarTag &
", got <nil>"
else:
echo "[\"" & actual.scalarContent &
"\".tag] expected tag " & expected.scalarTag &
", got " & actual.scalarTag
elif expected.scalarAnchor != actual.scalarAnchor: elif expected.scalarAnchor != actual.scalarAnchor:
echo "[scalar] expected anchor " & expected.scalarAnchor & echo "[scalar] expected anchor " & expected.scalarAnchor &
", got " & actual.scalarAnchor ", got " & actual.scalarAnchor
@ -73,14 +82,24 @@ proc printDifference(expected, actual: YamlParserEvent) =
break break
else: else:
echo "[scalar] Unknown difference" echo "[scalar] Unknown difference"
of yamlStartMap, yamlStartSequence:
if expected.objTag != actual.objTag:
if isNil(expected.objTag):
echo "[object.tag] expected <nil>, got " & actual.objTag
elif isNil(actual.objTag):
echo "[object.tag] expected " & expected.objTag &
", got <nil>"
else:
echo ""
else: else:
echo "Unknown difference in event kind " & $expected.kind echo "Unknown difference in event kind " & $expected.kind
template ensure(input: string, expected: varargs[YamlParserEvent]) {.dirty.} = template ensure(input: string, expected: varargs[YamlParserEvent]) {.dirty.} =
var var
i = 0 i = 0
parser = initParser()
for token in events(newStringStream(input)): for token in parser.events(newStringStream(input)):
if i >= expected.len: if i >= expected.len:
echo "received more tokens than expected (next token = ", echo "received more tokens than expected (next token = ",
token.kind, ")" token.kind, ")"