mirror of https://github.com/status-im/NimYAML.git
Started re-implementing parser
* Made TagLibrary a ref object * Started writing fastparse.nim, a parser aimed to deliver much faster speeds than the current implementation. It currently is only able to parse block content
This commit is contained in:
parent
94fd70a808
commit
55d5cfcbf9
|
@ -69,4 +69,7 @@ proc endSeqEvent*(): YamlStreamEvent =
|
|||
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
|
||||
anchor: AnchorId = yAnchorNone): YamlStreamEvent =
|
||||
result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag,
|
||||
scalarAnchor: anchor, scalarContent: content)
|
||||
scalarAnchor: anchor, scalarContent: content)
|
||||
|
||||
proc aliasEvent*(anchor: AnchorId): YamlStreamEvent =
|
||||
result = YamlStreamEvent(kind: yamlAlias, aliasTarget: anchor)
|
|
@ -0,0 +1,901 @@
|
|||
type
|
||||
FastParseState = enum
|
||||
fpInitial, fpBlockLineStart, fpBlockAfterScalar, fpBlockAfterPlainScalar,
|
||||
fpBlockObjectStart, fpBlockContinueScalar, fpExpectDocEnd
|
||||
|
||||
FastParseLevelKind = enum
|
||||
fplUnknown, fplSequence, fplMapKey, fplMapValue, fplScalar
|
||||
|
||||
FastParseLevel = object
|
||||
kind: FastParseLevelKind
|
||||
indentation: int
|
||||
|
||||
LexedDirective = enum
|
||||
ldYaml, ldTag, ldUnknown
|
||||
|
||||
LexedPossibleDirectivesEnd = enum
|
||||
lpdeDirectivesEnd, lpdeSequenceItem, lpdeScalarContent
|
||||
|
||||
YamlContext = enum
|
||||
cFlowIn, cFlowOut, cFlowKey, cBlockKey, cBlockIn, cBlockOut
|
||||
|
||||
FastLexer = object of BaseLexer
|
||||
tokenstart: int
|
||||
|
||||
const
|
||||
space = [' ', '\t']
|
||||
lineEnd = ['\x0A', '\c', EndOfFile]
|
||||
spaceOrLineEnd = [' ', '\t', '\x0A', '\c', EndOfFile]
|
||||
digits = '0'..'9'
|
||||
flowIndicators = ['[', ']', '{', '}', ',']
|
||||
|
||||
template debug(message: string) {.dirty.} =
|
||||
when defined(yamlDebug):
|
||||
try: styledWriteLine(stdout, fgBlue, message)
|
||||
except IOError: discard
|
||||
|
||||
template raiseError(message: string) {.dirty.} =
|
||||
var e = newException(YamlParserError, message)
|
||||
e.line = lexer.lineNumber
|
||||
e.column = lexer.tokenstart
|
||||
e.lineContent = lexer.getCurrentLine(false) &
|
||||
repeat(' ', lexer.getColNumber(lexer.bufpos)) & "^\n"
|
||||
raise e
|
||||
|
||||
template raiseError(message: string, col: int) {.dirty.} =
|
||||
var e = newException(YamlParserError, message)
|
||||
e.line = lexer.lineNumber
|
||||
e.column = col
|
||||
e.lineContent = lexer.getCurrentLine(false) &
|
||||
repeat(' ', lexer.getColNumber(lexer.bufpos)) & "^\n"
|
||||
raise e
|
||||
|
||||
template closeLevel() {.dirty.} =
|
||||
case level.kind
|
||||
of fplSequence:
|
||||
yield endSeqEvent()
|
||||
of fplMapKey:
|
||||
yield endMapEvent()
|
||||
of fplMapValue:
|
||||
yield scalarEvent("", tag, anchor)
|
||||
tag = yTagQuestionMark
|
||||
anchor = yAnchorNone
|
||||
yield endMapEvent()
|
||||
of fplScalar:
|
||||
applyObjectProperties()
|
||||
yield cachedScalar
|
||||
of fplUnknown:
|
||||
yield scalarEvent("")
|
||||
if ancestry.len > 0:
|
||||
level = ancestry.pop()
|
||||
|
||||
template handleLineEnd(insideDocument: bool) {.dirty.} =
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '\x0A':
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
of '\c':
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
of EndOfFile:
|
||||
when insideDocument:
|
||||
closeEverything()
|
||||
return
|
||||
else:
|
||||
discard
|
||||
|
||||
template handleObjectEnd() {.dirty.} =
|
||||
tag = yTagQuestionMark
|
||||
anchor = yAnchorNone
|
||||
case level.kind
|
||||
of fplMapKey:
|
||||
level.kind = fplMapValue
|
||||
of fplMapValue:
|
||||
level.kind = fplMapKey
|
||||
of fplSequence:
|
||||
discard
|
||||
of fplUnknown, fplScalar:
|
||||
raiseError("Internal error!")
|
||||
|
||||
template handleStartObject(k: YamlStreamEventKind) {.dirty.} =
|
||||
when k == yamlStartMap:
|
||||
yield startMapEvent(objectTag, objectAnchor)
|
||||
debug("started map at " & $lexer.tokenstart)
|
||||
else:
|
||||
yield startSeqEvent(objectTag, objectAnchor)
|
||||
debug("started sequence at " & $lexer.tokenstart)
|
||||
objectTag = yTagQuestionMark
|
||||
objectAnchor = yAnchorNone
|
||||
|
||||
template closeMoreIndentedLevels() {.dirty.} =
|
||||
while ancestry.len > 0:
|
||||
let parent = ancestry[ancestry.high]
|
||||
if parent.indentation >= indentation:
|
||||
debug("Closing because level.indentation =" & $level.indentation &
|
||||
", but indentation = " & $indentation)
|
||||
closeLevel()
|
||||
handleObjectEnd()
|
||||
else:
|
||||
break
|
||||
|
||||
template closeEverything() {.dirty.} =
|
||||
indentation = 0
|
||||
closeMoreIndentedLevels()
|
||||
closeLevel()
|
||||
yield endDocEvent()
|
||||
|
||||
template handleStartBlockSequence() {.dirty.} =
|
||||
case level.kind
|
||||
of fplUnknown:
|
||||
level.kind = fplSequence
|
||||
handleStartObject(yamlStartSequence)
|
||||
of fplSequence:
|
||||
if level.indentation != indentation:
|
||||
raiseError("Invalid indentation of block sequence indicator",
|
||||
lexer.bufpos)
|
||||
else:
|
||||
raiseError("Illegal sequence item in map")
|
||||
ancestry.add(level)
|
||||
lexer.skipWhitespace()
|
||||
indentation = lexer.getColNumber(lexer.bufpos)
|
||||
level = FastParseLevel(kind: fplUnknown, indentation: indentation)
|
||||
|
||||
template handleStartBlockScalar() {.dirty.} =
|
||||
case level.kind
|
||||
of fplUnknown, fplMapKey:
|
||||
discard
|
||||
of fplSequence:
|
||||
raiseError("Illegal token (expected '- ')")
|
||||
of fplMapValue, fplScalar:
|
||||
raiseError("Internal error!")
|
||||
|
||||
template propsToObjectProps() {.dirty.} =
|
||||
if objectTag == yTagQuestionmark:
|
||||
objectTag = tag
|
||||
tag = yTagQuestionmark
|
||||
elif tag != yTagQuestionMark:
|
||||
raiseError("Only one tag is allowed per node")
|
||||
if objectAnchor == yAnchorNone:
|
||||
objectAnchor = anchor
|
||||
anchor = yAnchorNone
|
||||
elif anchor != yAnchorNone:
|
||||
raiseError("Only one anchor is allowed per node")
|
||||
|
||||
template initDocValues() {.dirty.} =
|
||||
shorthands = initTable[string, string]()
|
||||
anchors = initTable[string, AnchorId]()
|
||||
shorthands["!"] = "!"
|
||||
shorthands["!!"] = "tag:yaml.org,2002:"
|
||||
nextAnchorId = 0.AnchorId
|
||||
level = FastParseLevel(kind: fplUnknown, indentation: -1)
|
||||
|
||||
template applyObjectProperties() {.dirty.} =
|
||||
if objectTag != yTagQuestionmark:
|
||||
if cachedScalar.scalarTag != yTagQuestionmark:
|
||||
raiseError("Only one tag is allowed per node")
|
||||
else:
|
||||
cachedScalar.scalarTag = objectTag
|
||||
objectTag = yTagQuestionmark
|
||||
if objectAnchor != yAnchorNone:
|
||||
if cachedScalar.scalarAnchor != yAnchorNone:
|
||||
raiseError("Only one anchor is allowed per node")
|
||||
else:
|
||||
cachedScalar.scalarAnchor = objectAnchor
|
||||
objectAnchor = yAnchorNone
|
||||
|
||||
template finishLine(lexer: FastLexer) =
|
||||
debug("lex: finishLine")
|
||||
while lexer.buf[lexer.bufpos] notin lineEnd:
|
||||
lexer.bufpos.inc()
|
||||
|
||||
template skipWhitespace(lexer: FastLexer) =
|
||||
debug("lex: skipWhitespace")
|
||||
while lexer.buf[lexer.bufpos] in space: lexer.bufpos.inc()
|
||||
|
||||
template skipIndentation(lexer: FastLexer) =
|
||||
debug("lex: skipIndentation")
|
||||
while lexer.buf[lexer.bufpos] == ' ': lexer.bufpos.inc()
|
||||
|
||||
template directiveName(lexer: FastLexer, directive: var LexedDirective) =
|
||||
debug("lex: directiveName")
|
||||
directive = ldUnknown
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'Y':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'A':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'M':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'L':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] in spaceOrLineEnd:
|
||||
directive = ldYaml
|
||||
elif lexer.buf[lexer.bufpos] == 'T':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'A':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == 'G':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] in [' ', '\t', '\x0A', '\c', EndOfFile]:
|
||||
directive = ldTag
|
||||
while lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
|
||||
lexer.bufpos.inc()
|
||||
|
||||
template yamlVersion(lexer: FastLexer, o: var string) =
|
||||
debug("lex: yamlVersion")
|
||||
while lexer.buf[lexer.bufpos] in space:
|
||||
lexer.bufpos.inc()
|
||||
var c = lexer.buf[lexer.bufpos]
|
||||
if c notin digits:
|
||||
raiseError("Invalid YAML version number")
|
||||
o.add(c)
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
c = lexer.buf[lexer.bufpos]
|
||||
while c in digits:
|
||||
lexer.bufpos.inc()
|
||||
o.add(c)
|
||||
c = lexer.buf[lexer.bufpos]
|
||||
if lexer.buf[lexer.bufpos] != '.':
|
||||
raiseError("Invalid YAML version number")
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] notin digits:
|
||||
raiseError("Invalid YAML version number")
|
||||
lexer.bufpos.inc()
|
||||
while lexer.buf[lexer.bufpos] in digits:
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
|
||||
raiseError("Invalid YAML version number")
|
||||
|
||||
template lineEnding(lexer: FastLexer) =
|
||||
debug("lex: lineEnding")
|
||||
if lexer.buf[lexer.bufpos] notin lineEnd:
|
||||
while lexer.buf[lexer.bufpos] in space:
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] in lineEnd:
|
||||
discard
|
||||
elif lexer.buf[lexer.bufpos] == '#':
|
||||
while lexer.buf[lexer.bufpos] notin lineEnd:
|
||||
lexer.bufpos.inc()
|
||||
else:
|
||||
raiseError("Unexpected token (expected comment or line end)",
|
||||
lexer.bufpos)
|
||||
|
||||
template tagShorthand(lexer: FastLexer, shorthand: var string) =
|
||||
debug("lex: tagShorthand")
|
||||
while lexer.buf[lexer.bufpos] in space:
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] != '!':
|
||||
raiseError("Invalid tag shorthand")
|
||||
shorthand.add('!')
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
var c = lexer.buf[lexer.bufpos]
|
||||
if c in spaceOrLineEnd:
|
||||
discard
|
||||
else:
|
||||
while c != '!':
|
||||
case c
|
||||
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-':
|
||||
shorthand.add(c)
|
||||
lexer.bufpos.inc()
|
||||
c = lexer.buf[lexer.bufpos]
|
||||
else:
|
||||
raiseError("Illegal character in tag shorthand", lexer.bufpos)
|
||||
shorthand.add(c)
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
|
||||
raiseError("Missing space after tag shorthand", lexer.bufpos)
|
||||
|
||||
template tagUri(lexer: FastLexer, uri: var string) =
|
||||
debug("lex: tagUri")
|
||||
while lexer.buf[lexer.bufpos] in space:
|
||||
lexer.bufpos.inc()
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
var c = lexer.buf[lexer.bufpos]
|
||||
while c notin spaceOrLineEnd:
|
||||
case c
|
||||
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', '&',
|
||||
'-', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
|
||||
uri.add(c)
|
||||
lexer.bufpos.inc()
|
||||
c = lexer.buf[lexer.bufpos]
|
||||
else:
|
||||
raiseError("Invalid tag uri")
|
||||
|
||||
template directivesEnd(lexer: FastLexer, content: var string,
|
||||
token: var LexedPossibleDirectivesEnd) =
|
||||
debug("lex: directivesEnd")
|
||||
content.add('-')
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '-':
|
||||
content.add('-')
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == '-':
|
||||
content.add('-')
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] in spaceOrLineEnd:
|
||||
token = lpdeDirectivesEnd
|
||||
else:
|
||||
token = lpdeScalarContent
|
||||
else:
|
||||
token = lpdeScalarContent
|
||||
of spaceOrLineEnd:
|
||||
token = lpdeSequenceItem
|
||||
else:
|
||||
token = lpdeScalarContent
|
||||
|
||||
template documentEnd(lexer: var FastLexer, content: var string,
|
||||
isDocumentEnd: var bool) =
|
||||
content.add('.')
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == '.':
|
||||
content.add('.')
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == '.':
|
||||
content.add('.')
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] in spaceOrLineEnd:
|
||||
isDocumentEnd = true
|
||||
else:
|
||||
isDocumentEnd = false
|
||||
else:
|
||||
isDocumentEnd = false
|
||||
else:
|
||||
isDocumentEnd = false
|
||||
|
||||
template singleQuotedScalar(lexer: FastLexer, content: var string) =
|
||||
debug("lex: singleQuotedScalar")
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
while true:
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '\'':
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] == '\'':
|
||||
content.add('\'')
|
||||
else:
|
||||
break
|
||||
of EndOfFile:
|
||||
raiseError("Unfinished single quoted string")
|
||||
else:
|
||||
content.add(lexer.buf[lexer.bufpos])
|
||||
lexer.bufpos.inc()
|
||||
|
||||
proc unicodeSequence(lexer: var FastLexer, length: int):
|
||||
string {.raises: [YamlParserError].} =
|
||||
debug("lex: unicodeSequence")
|
||||
var unicodeChar = 0.Rune
|
||||
let start = lexer.bufpos - 1
|
||||
for i in countup(0, length - 1):
|
||||
lexer.bufpos.inc()
|
||||
let
|
||||
digitPosition = length - i - 1
|
||||
c = lexer.buf[lexer.bufpos]
|
||||
case c
|
||||
of EndOFFile:
|
||||
raiseError("Unfinished unicode escape sequence", start)
|
||||
of '0' .. '9':
|
||||
unicodeChar = unicodechar or
|
||||
(cast[int](c) - 0x30) shl (digitPosition * 4)
|
||||
of 'A' .. 'F':
|
||||
unicodeChar = unicodechar or
|
||||
(cast[int](c) - 0x37) shl (digitPosition * 4)
|
||||
of 'a' .. 'f':
|
||||
unicodeChar = unicodechar or
|
||||
(cast[int](c) - 0x57) shl (digitPosition * 4)
|
||||
else:
|
||||
raiseError("Invalid character in unicode escape sequence", lexer.bufpos)
|
||||
return toUTF8(unicodeChar)
|
||||
|
||||
template doublyQuotedScalar(lexer: FastLexer, content: var string) =
|
||||
debug("lex: doublyQuotedScalar")
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
while true:
|
||||
lexer.bufpos.inc()
|
||||
let c = lexer.buf[lexer.bufpos]
|
||||
case c
|
||||
of EndOfFile:
|
||||
raiseError("Unfinished doubly quoted string")
|
||||
of '\\':
|
||||
lexer.bufpos.inc()
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of EndOfFile:
|
||||
raiseError("Unfinished escape sequence")
|
||||
of '0': content.add('\0')
|
||||
of 'a': content.add('\x07')
|
||||
of 'b': content.add('\x08')
|
||||
of '\t', 't': content.add('\t')
|
||||
of 'n': content.add('\x0A')
|
||||
of 'v': content.add('\v')
|
||||
of 'f': content.add('\f')
|
||||
of 'r': content.add('\r')
|
||||
of 'e': content.add('\e')
|
||||
of ' ': content.add(' ')
|
||||
of '"': content.add('"')
|
||||
of '/': content.add('/')
|
||||
of '\\': content.add('\\')
|
||||
of 'N': content.add(UTF8NextLine)
|
||||
of '_': content.add(UTF8NonBreakingSpace)
|
||||
of 'L': content.add(UTF8LineSeparator)
|
||||
of 'P': content.add(UTF8ParagraphSeparator)
|
||||
of 'x': content.add(lexer.unicodeSequence(2))
|
||||
of 'u': content.add(lexer.unicodeSequence(4))
|
||||
of 'U': content.add(lexer.unicodeSequence(8))
|
||||
else:
|
||||
raiseError("Illegal character in escape sequence")
|
||||
of '"':
|
||||
lexer.bufpos.inc()
|
||||
break
|
||||
else:
|
||||
content.add(c)
|
||||
|
||||
proc isPlainSafe(lexer: FastLexer, index: int, context: YamlContext): bool =
|
||||
case lexer.buf[lexer.bufpos + 1]
|
||||
of spaceOrLineEnd:
|
||||
result = false
|
||||
of flowIndicators:
|
||||
result = context in [cFlowOut, cBlockKey]
|
||||
else:
|
||||
result = true
|
||||
|
||||
template plainScalar(lexer: FastLexer, content: var string,
|
||||
context: YamlContext) =
|
||||
debug("lex: plainScalar")
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
content.add(lexer.buf[lexer.bufpos])
|
||||
block outer:
|
||||
while true:
|
||||
lexer.bufpos.inc()
|
||||
let c = lexer.buf[lexer.bufpos]
|
||||
case c
|
||||
of lineEnd:
|
||||
break
|
||||
of ' ', '\t':
|
||||
var after = "" & c
|
||||
while true:
|
||||
lexer.bufpos.inc()
|
||||
let c2 = lexer.buf[lexer.bufpos]
|
||||
case c2
|
||||
of ' ', '\t':
|
||||
after.add(c2)
|
||||
of lineEnd:
|
||||
break outer
|
||||
of ':':
|
||||
if lexer.isPlainSafe(lexer.bufpos + 1, context):
|
||||
content.add(after & ':')
|
||||
else:
|
||||
break outer
|
||||
of '#':
|
||||
break outer
|
||||
else:
|
||||
content.add(after)
|
||||
content.add(c2)
|
||||
break
|
||||
of flowIndicators:
|
||||
if context in [cFlowOut, cBlockKey]:
|
||||
content.add(c)
|
||||
else:
|
||||
break
|
||||
of ':':
|
||||
if lexer.isPlainSafe(lexer.bufpos + 1, context):
|
||||
content.add(':')
|
||||
else:
|
||||
break outer
|
||||
of '#':
|
||||
break outer
|
||||
else:
|
||||
content.add(c)
|
||||
|
||||
template continueMultilineScalar() {.dirty.} =
|
||||
cachedScalar.scalarContent.add(if newlines == 1: " " else:
|
||||
repeat('\x0A', newlines - 1))
|
||||
lexer.plainScalar(cachedScalar.scalarContent, cBlockOut)
|
||||
state = fpBlockAfterPlainScalar
|
||||
|
||||
template tagHandle(lexer: var FastLexer, content: var string,
|
||||
shorthandEnd: var int) =
|
||||
debug("lex: tagHandle")
|
||||
shorthandEnd = 0
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
content.add(lexer.buf[lexer.bufpos])
|
||||
var i = 0
|
||||
while true:
|
||||
lexer.bufpos.inc()
|
||||
i.inc()
|
||||
let c = lexer.buf[lexer.bufpos]
|
||||
case c
|
||||
of spaceOrLineEnd:
|
||||
if shorthandEnd == -1:
|
||||
raiseError("Unclosed verbatim tag")
|
||||
break
|
||||
of '!':
|
||||
if shorthandEnd == -1 and i == 2:
|
||||
content.add(c)
|
||||
elif shorthandEnd != 0:
|
||||
raiseError("Illegal character in tag suffix", lexer.bufpos)
|
||||
shorthandEnd = i
|
||||
content.add(c)
|
||||
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', '&',
|
||||
'-', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
|
||||
content.add(c)
|
||||
of '<':
|
||||
if i == 1:
|
||||
shorthandEnd = -1
|
||||
content = ""
|
||||
else:
|
||||
raiseError("Illegal character in tag handle", lexer.bufpos)
|
||||
of '>':
|
||||
if shorthandEnd == -1:
|
||||
lexer.bufpos.inc()
|
||||
if lexer.buf[lexer.bufpos] notin spaceOrLineEnd:
|
||||
raiseError("Missing space after verbatim tag handle", lexer.bufpos)
|
||||
break
|
||||
else:
|
||||
raiseError("Illegal character in tag handle", lexer.bufpos)
|
||||
else:
|
||||
raiseError("Illegal character in tag handle", lexer.bufpos)
|
||||
|
||||
template anchorName(lexer: FastLexer, content: var string) =
|
||||
debug("lex: anchorName")
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
while true:
|
||||
lexer.bufpos.inc()
|
||||
let c = lexer.buf[lexer.bufpos]
|
||||
case c
|
||||
of spaceOrLineEnd:
|
||||
break
|
||||
of '[', ']', '{', '}', ',':
|
||||
raiseError("Illegal character in anchor", lexer.bufpos)
|
||||
else:
|
||||
content.add(c)
|
||||
|
||||
proc fastparse*(tagLib: TagLibrary, s: Stream): YamlStream =
|
||||
result = iterator(): YamlStreamEvent =
|
||||
var
|
||||
lexer: FastLexer
|
||||
state = fpInitial
|
||||
shorthands: Table[string, string]
|
||||
anchors: Table[string, AnchorId]
|
||||
nextAnchorId: AnchorId
|
||||
content: string
|
||||
tag, objectTag: TagId = yTagQuestionMark
|
||||
anchor, objectAnchor: AnchorId = yAnchorNone
|
||||
ancestry = newSeq[FastParseLevel]()
|
||||
level: FastParseLevel
|
||||
cachedScalar: YamlStreamEvent
|
||||
indentation: int
|
||||
newlines: int
|
||||
|
||||
lexer.open(s)
|
||||
initDocValues()
|
||||
|
||||
while true:
|
||||
case state
|
||||
of fpInitial:
|
||||
debug("state: initial")
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '%':
|
||||
var ld: LexedDirective
|
||||
lexer.directiveName(ld)
|
||||
case ld
|
||||
of ldYaml:
|
||||
var version = ""
|
||||
lexer.yamlVersion(version)
|
||||
if version != "1.2":
|
||||
echo "version is not 1.2!"
|
||||
# TODO: warning (unknown version)
|
||||
discard
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(false)
|
||||
of ldTag:
|
||||
var shorthand, uri = ""
|
||||
lexer.tagShorthand(shorthand)
|
||||
lexer.tagUri(uri)
|
||||
shorthands.add(shorthand, uri)
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(false)
|
||||
of ldUnknown:
|
||||
# TODO: warning (unknown directive)
|
||||
lexer.finishLine()
|
||||
handleLineEnd(false)
|
||||
of ' ', '\t':
|
||||
lexer.bufpos.inc()
|
||||
of '\x0A':
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
of '\c':
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
lexer.bufpos.inc()
|
||||
of EndOfFile:
|
||||
return
|
||||
of '#':
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(false)
|
||||
of '-':
|
||||
var token: LexedPossibleDirectivesEnd
|
||||
content = ""
|
||||
lexer.directivesEnd(content, token)
|
||||
yield startDocEvent()
|
||||
case token
|
||||
of lpdeDirectivesEnd:
|
||||
state = fpBlockObjectStart
|
||||
of lpdeSequenceItem:
|
||||
indentation = 0
|
||||
handleStartBlockSequence()
|
||||
state = fpBlockObjectStart
|
||||
of lpdeScalarContent:
|
||||
lexer.plainScalar(content, cBlockOut)
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterPlainScalar
|
||||
else:
|
||||
yield startDocEvent()
|
||||
state = fpBlockLineStart
|
||||
of fpBlockLineStart:
|
||||
debug("state: blockLineStart")
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '-':
|
||||
var token: LexedPossibleDirectivesEnd
|
||||
content = ""
|
||||
lexer.directivesEnd(content, token)
|
||||
case token
|
||||
of lpdeDirectivesEnd:
|
||||
closeEverything()
|
||||
initDocValues()
|
||||
yield startDocEvent()
|
||||
state = fpBlockObjectStart
|
||||
of lpdeSequenceItem:
|
||||
indentation = 0
|
||||
closeMoreIndentedLevels()
|
||||
handleStartBlockSequence()
|
||||
state = fpBlockObjectStart
|
||||
of lpdeScalarContent:
|
||||
if level.kind == fplScalar:
|
||||
continueMultilineScalar()
|
||||
else:
|
||||
lexer.plainScalar(content, cBlockOut)
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterPlainScalar
|
||||
of '.':
|
||||
var isDocumentEnd: bool
|
||||
content = ""
|
||||
lexer.documentEnd(content, isDocumentEnd)
|
||||
if isDocumentEnd:
|
||||
lexer.lineEnding()
|
||||
closeEverything()
|
||||
initDocValues()
|
||||
state = fpInitial
|
||||
elif level.kind == fplScalar:
|
||||
continueMultilineScalar()
|
||||
else:
|
||||
lexer.plainScalar(content, cBlockOut)
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterPlainScalar
|
||||
of ' ':
|
||||
lexer.skipIndentation()
|
||||
indentation = lexer.getColNumber(lexer.bufpos)
|
||||
closeMoreIndentedLevels()
|
||||
case level.kind
|
||||
of fplScalar:
|
||||
state = fpBlockContinueScalar
|
||||
of fplUnknown:
|
||||
state = fpBlockObjectStart
|
||||
level.indentation = indentation
|
||||
else:
|
||||
state = fpBlockObjectStart
|
||||
else:
|
||||
indentation = 0
|
||||
closeMoreIndentedLevels()
|
||||
case level.kind
|
||||
of fplScalar:
|
||||
state = fpBlockContinueScalar
|
||||
of fplUnknown:
|
||||
state = fpBlockObjectStart
|
||||
level.indentation = indentation
|
||||
else:
|
||||
state = fpBlockObjectStart
|
||||
of fpBlockContinueScalar:
|
||||
debug("state: blockAfterPlainScalar")
|
||||
lexer.skipWhitespace()
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '\x0A':
|
||||
newlines.inc()
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
state = fpBlockLineStart
|
||||
of '\c':
|
||||
newlines.inc()
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
of ':':
|
||||
if lexer.isPlainSafe(lexer.bufpos + 1, cBlockOut):
|
||||
continueMultilineScalar()
|
||||
else:
|
||||
raiseError("Unexpected token", lexer.bufpos)
|
||||
of '#':
|
||||
yield cachedScalar
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(true)
|
||||
if ancestry.len == 0:
|
||||
state = fpExpectDocEnd
|
||||
else:
|
||||
level = ancestry.pop()
|
||||
handleObjectEnd()
|
||||
state = fpBlockLineStart
|
||||
else:
|
||||
continueMultilineScalar()
|
||||
|
||||
of fpBlockAfterPlainScalar:
|
||||
debug("state: blockAfterPlainScalar")
|
||||
lexer.skipWhitespace()
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '\x0A':
|
||||
if level.kind notin [fplUnknown, fplScalar]:
|
||||
raiseError("Unexpected scalar")
|
||||
newlines = 1
|
||||
level.kind = fplScalar
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
state = fpBlockLineStart
|
||||
of '\c':
|
||||
if level.kind notin [fplUnknown, fplScalar]:
|
||||
raiseError("Unexpected scalar")
|
||||
newlines = 1
|
||||
level.kind = fplScalar
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
state = fpBlockLineStart
|
||||
else:
|
||||
state = fpBlockAfterScalar
|
||||
of fpBlockAfterScalar:
|
||||
debug("state: blockAfterScalar")
|
||||
lexer.skipWhitespace()
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of EndOfFile:
|
||||
level.kind = fplScalar
|
||||
closeEverything()
|
||||
break
|
||||
of '\x0A':
|
||||
if level.kind != fplUnknown:
|
||||
raiseError("Unexpected scalar")
|
||||
applyObjectProperties()
|
||||
yield cachedScalar
|
||||
if ancestry.len == 0:
|
||||
state = fpExpectDocEnd
|
||||
else:
|
||||
level = ancestry.pop()
|
||||
handleObjectEnd()
|
||||
state = fpBlockLineStart
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
of '\c':
|
||||
if level.kind != fplUnknown:
|
||||
raiseError("Unexpected scalar")
|
||||
applyObjectProperties()
|
||||
yield cachedScalar
|
||||
if ancestry.len == 0:
|
||||
state = fpExpectDocEnd
|
||||
else:
|
||||
level = ancestry.pop()
|
||||
handleObjectEnd()
|
||||
state = fpBlockLineStart
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
of ':':
|
||||
case level.kind
|
||||
of fplUnknown:
|
||||
level.kind = fplMapKey
|
||||
handleStartObject(yamlStartMap)
|
||||
of fplMapValue:
|
||||
yield scalarEvent("", yTagQuestionMark, yAnchorNone)
|
||||
level.kind = fplMapKey
|
||||
of fplMapKey:
|
||||
if level.indentation != indentation:
|
||||
raiseError("Invalid indentation for map key")
|
||||
of fplSequence:
|
||||
raiseError("Illegal token (expected sequence item)")
|
||||
of fplScalar:
|
||||
raiseError("Multiline scalars may not be implicit map keys")
|
||||
handleObjectEnd()
|
||||
yield cachedScalar
|
||||
ancestry.add(level)
|
||||
lexer.bufpos.inc()
|
||||
lexer.skipWhitespace()
|
||||
indentation = lexer.getColNumber(lexer.bufpos)
|
||||
level = FastParseLevel(kind: fplUnknown, indentation: indentation)
|
||||
state = fpBlockObjectStart
|
||||
of '#':
|
||||
applyObjectProperties()
|
||||
yield cachedScalar
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(true)
|
||||
state = fpBlockLineStart
|
||||
else:
|
||||
raiseError("Illegal token (expected ':', comment or line end)",
|
||||
lexer.bufpos)
|
||||
of fpBlockObjectStart:
|
||||
debug("state: blockObjectStart")
|
||||
lexer.skipWhitespace()
|
||||
let objectStart = lexer.getColNumber(lexer.bufpos)
|
||||
case lexer.buf[lexer.bufpos]
|
||||
of '\x0A':
|
||||
propsToObjectProps()
|
||||
lexer.bufpos = lexer.handleLF(lexer.bufpos)
|
||||
state = fpBlockLineStart
|
||||
of '\c':
|
||||
propsToObjectProps()
|
||||
lexer.bufpos = lexer.handleCR(lexer.bufpos)
|
||||
state = fpBlockLineStart
|
||||
of EndOfFile:
|
||||
closeEverything()
|
||||
return
|
||||
of '#':
|
||||
lexer.lineEnding()
|
||||
handleLineEnd(true)
|
||||
of '\'':
|
||||
handleStartBlockScalar()
|
||||
content = ""
|
||||
lexer.singleQuotedScalar(content)
|
||||
if tag == yTagQuestionMark:
|
||||
tag = yTagExclamationMark
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterScalar
|
||||
of '"':
|
||||
handleStartBlockScalar()
|
||||
content = ""
|
||||
lexer.doublyQuotedScalar(content)
|
||||
if tag == yTagQuestionMark:
|
||||
tag = yTagExclamationMark
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterScalar
|
||||
of '-':
|
||||
if lexer.isPlainSafe(lexer.bufpos + 1, cBlockOut):
|
||||
handleStartBlockScalar()
|
||||
lexer.tokenstart = lexer.getColNumber(lexer.bufpos)
|
||||
lexer.plainScalar(content, cBlockOut)
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterPlainScalar
|
||||
else:
|
||||
lexer.bufpos.inc()
|
||||
handleStartBlockSequence()
|
||||
of '!':
|
||||
if tag != yTagQuestionmark:
|
||||
raiseError("Only one tag handle is allowed per node")
|
||||
content = ""
|
||||
var
|
||||
shorthandEnd: int
|
||||
tagUri: string
|
||||
lexer.tagHandle(content, shorthandEnd)
|
||||
if shorthandEnd != -1:
|
||||
try:
|
||||
let prefix = shorthands[content[0..shorthandEnd]]
|
||||
tagUri = prefix & content[shorthandEnd + 1 .. ^1]
|
||||
except KeyError:
|
||||
raiseError("Undefined tag shorthand: " & content[0..shorthandEnd])
|
||||
else:
|
||||
shallowCopy(tagUri, content)
|
||||
try:
|
||||
tag = tagLib.tags[tagUri]
|
||||
except KeyError:
|
||||
tag = tagLib.registerUri(tagUri)
|
||||
of '&':
|
||||
if anchor != yAnchorNone:
|
||||
raiseError("Only one anchor is allowed per node", lexer.bufpos)
|
||||
content = ""
|
||||
lexer.anchorName(content)
|
||||
anchor = nextAnchorId
|
||||
anchors[content] = anchor
|
||||
nextAnchorId = cast[AnchorId](cast[int](nextAnchorId) + 1)
|
||||
of '*':
|
||||
if anchor != yAnchorNone or tag != yTagQuestionmark:
|
||||
raiseError("Alias may not have anchor or tag")
|
||||
content = ""
|
||||
lexer.anchorName(content)
|
||||
try:
|
||||
cachedScalar = aliasEvent(anchors[content])
|
||||
except KeyError:
|
||||
raiseError("Unknown anchor")
|
||||
state = fpBlockAfterScalar
|
||||
else:
|
||||
handleStartBlockScalar()
|
||||
content = ""
|
||||
lexer.plainScalar(content, cBlockOut)
|
||||
cachedScalar = scalarEvent(content, tag, anchor)
|
||||
state = fpBlockAfterPlainScalar
|
||||
of fpExpectDocEnd:
|
||||
discard # TODO
|
|
@ -153,8 +153,7 @@ template yieldToken(kind: YamlLexerToken) {.dirty.} =
|
|||
|
||||
template yieldScalarPart() {.dirty.} =
|
||||
when defined(yamlDebug):
|
||||
echo "Lexer token: tScalarPart(\"", my.content, "\".", typeHintState,
|
||||
")"
|
||||
echo "Lexer token: tScalarPart(\"", my.content, "\")"
|
||||
yield tScalarPart
|
||||
my.content = ""
|
||||
|
||||
|
|
|
@ -5,11 +5,12 @@
|
|||
# distribution, for details about the copyright.
|
||||
|
||||
proc initTagLibrary*(): TagLibrary =
|
||||
new(result)
|
||||
result.tags = initTable[string, TagId]()
|
||||
result.nextCustomTagId = yFirstCustomTagId
|
||||
result.secondaryPrefix = yamlTagRepositoryPrefix
|
||||
|
||||
proc registerUri*(tagLib: var TagLibrary, uri: string): TagId =
|
||||
proc registerUri*(tagLib: TagLibrary, uri: string): TagId =
|
||||
tagLib.tags[uri] = tagLib.nextCustomTagId
|
||||
result = tagLib.nextCustomTagId
|
||||
tagLib.nextCustomTagId = cast[TagId](cast[int](tagLib.nextCustomTagId) + 1)
|
||||
|
|
26
yaml.nim
26
yaml.nim
|
@ -20,6 +20,9 @@
|
|||
import streams, unicode, lexbase, tables, strutils, json, hashes, queues, macros
|
||||
export streams, tables, json
|
||||
|
||||
when defined(yamlDebug):
|
||||
import terminal
|
||||
|
||||
type
|
||||
TypeHint* = enum
|
||||
## A type hint is a friendly message from the YAML lexer, telling you
|
||||
|
@ -55,9 +58,9 @@ type
|
|||
TagId* = distinct int ## \
|
||||
## A ``TagId`` identifies a tag URI, like for example
|
||||
## ``"tag:yaml.org,2002:str"``. The URI corresponding to a ``TagId`` can
|
||||
## be queried from the `YamlTagLibrary <#YamlTagLibrary>`_ which was
|
||||
## be queried from the `TagLibrary <#TagLibrary>`_ which was
|
||||
## used to create this ``TagId`` with
|
||||
## `uri <#uri,YamlTagLibrary,TagId>`_. URI strings are
|
||||
## `uri <#uri,TagLibrary,TagId>`_. URI strings are
|
||||
## mapped to ``TagId`` s for efficiency reasons (you do not need to
|
||||
## compare strings every time) and to be able to discover unknown tag
|
||||
## URIs early in the parsing process.
|
||||
|
@ -68,7 +71,7 @@ type
|
|||
## (for example, because the parser yielded a ``yamlEndDocument``
|
||||
## event). ``AnchorId`` s exists because of efficiency, much like
|
||||
## ``TagId`` s. The actual anchor name can be queried with
|
||||
## `anchor <#anchor,YamlSequentialParser,AnchorId>`_.
|
||||
## `anchor <#anchor,YamlParser,AnchorId>`_.
|
||||
|
||||
YamlStreamEvent* = object
|
||||
## An element from a `YamlStream <#YamlStream>`_. Events that start an
|
||||
|
@ -80,7 +83,7 @@ type
|
|||
## the non-specific tags ``?`` or ``!`` according to the YAML
|
||||
## specification. These are by convention mapped to the ``TagId`` s
|
||||
## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively.
|
||||
## Mapping is done by a `YamlTagLibrary <#YamlTagLibrary>`_.
|
||||
## Mapping is done by a `TagLibrary <#TagLibrary>`_.
|
||||
case kind*: YamlStreamEventKind
|
||||
of yamlStartMap:
|
||||
mapAnchor* : AnchorId
|
||||
|
@ -110,7 +113,7 @@ type
|
|||
## always yield a well-formed ``YamlStream`` and expect it to be
|
||||
## well-formed if it's an input.
|
||||
|
||||
TagLibrary* = object
|
||||
TagLibrary* = ref object
|
||||
## A ``YamlTagLibrary`` maps tag URIs to ``TagId`` s. YAML tag URIs
|
||||
## that are defined in the YAML specification or in the
|
||||
## `YAML tag repository <http://yaml.org/type/>`_ should be mapped to
|
||||
|
@ -121,8 +124,8 @@ type
|
|||
## `coreTagLibrary <#coreTagLibrary>`_, and
|
||||
## `extendedTagLibrary <#extendedTagLibrary>`_.
|
||||
##
|
||||
## If the ``YamlSequentialParser`` encounters a tag which is not part of
|
||||
## the ``YamlTagLibrary``, it will create a new ``TagId`` equal to
|
||||
## If the ``YamlParser`` encounters a tag which is not part of
|
||||
## the ``TagLibrary``, it will create a new ``TagId`` equal to
|
||||
## ``nextCustomTagId`` and increase that variable. It will be
|
||||
## initialized to `yFirstCustomTagId <#yFirstCustomTagId>`_. If you do
|
||||
## not want to allow unknown tag URIs to be processed, just abort
|
||||
|
@ -309,6 +312,7 @@ proc endSeqEvent*(): YamlStreamEvent {.inline, raises: [].}
|
|||
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
|
||||
anchor: AnchorId = yAnchorNone):
|
||||
YamlStreamEvent {.inline, raises: [].}
|
||||
proc aliasEvent*(anchor: AnchorId): YamlStreamEvent {.inline, raises: [].}
|
||||
|
||||
proc `==`*(left, right: TagId): bool {.borrow.}
|
||||
proc `$`*(id: TagId): string
|
||||
|
@ -322,7 +326,7 @@ proc initTagLibrary*(): TagLibrary
|
|||
## initializes the ``tags`` table and sets ``nextCustomTagId`` to
|
||||
## ``yFirstCustomTagId``.
|
||||
|
||||
proc registerUri*(tagLib: var TagLibrary, uri: string): TagId
|
||||
proc registerUri*(tagLib: TagLibrary, uri: string): TagId
|
||||
## registers a custom tag URI with a ``YamlTagLibrary``. The URI will get
|
||||
## the ``TagId`` ``nextCustomTagId``, which will be incremented.
|
||||
|
||||
|
@ -375,6 +379,9 @@ proc parse*(parser: YamlParser, s: Stream):
|
|||
YamlStream {.raises: [IOError, YamlParserError].}
|
||||
## Parse a YAML character stream. ``s`` must be readable.
|
||||
|
||||
proc fastparse*(tagLib: TagLibrary, s: Stream):
|
||||
YamlStream {.raises: [IOError, YamlParserError].}
|
||||
|
||||
proc constructJson*(s: YamlStream): seq[JsonNode]
|
||||
## Construct an in-memory JSON tree from a YAML event stream. The stream may
|
||||
## not contain any tags apart from those in ``coreTagLibrary``. Anchors and
|
||||
|
@ -414,4 +421,5 @@ include private.events
|
|||
include private.parser
|
||||
include private.json
|
||||
include private.presenter
|
||||
include private.hints
|
||||
include private.hints
|
||||
include private.fastparse
|
Loading…
Reference in New Issue