NimYAML/private/lexer.nim

1100 lines
38 KiB
Nim
Raw Normal View History

# NimYAML - YAML implementation in Nim
# (c) Copyright 2015 Felix Krause
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
2015-11-27 22:42:11 +00:00
type
2015-12-23 11:35:07 +00:00
Encoding = enum
2015-11-27 22:42:11 +00:00
Unsupported, ## Unsupported encoding
UTF8, ## UTF-8
UTF16LE, ## UTF-16 Little Endian
UTF16BE, ## UTF-16 Big Endian
UTF32LE, ## UTF-32 Little Endian
UTF32BE ## UTF-32 Big Endian
2015-12-23 11:35:07 +00:00
YamlLexerToken = enum
# separating tokens
2015-12-23 11:35:07 +00:00
tDirectivesEnd, tDocumentEnd, tStreamEnd,
# tokens only in directives
2015-12-23 11:35:07 +00:00
tTagDirective, tYamlDirective, tUnknownDirective,
tVersionPart, tTagURI,
tUnknownDirectiveParam,
# tokens in directives and content
2015-12-23 11:35:07 +00:00
tTagHandle, tComment,
# from here on tokens only in content
2015-12-23 11:35:07 +00:00
tLineStart,
# control characters
2015-12-23 11:35:07 +00:00
tColon, tDash, tQuestionmark, tComma, tOpeningBrace,
tOpeningBracket, tClosingBrace, tClosingBracket, tPipe, tGreater,
# block scalar header
2015-12-23 11:35:07 +00:00
tBlockIndentationIndicator, tPlus,
# scalar content
2015-12-23 11:35:07 +00:00
tScalar, tScalarPart,
# tags
2015-12-23 11:35:07 +00:00
tVerbatimTag, tTagSuffix,
# anchoring
2015-12-23 11:35:07 +00:00
tAnchor, tAlias,
# error reporting
2015-12-23 11:35:07 +00:00
tError
2015-12-11 21:55:21 +00:00
2015-11-27 22:42:11 +00:00
YamlLexerState = enum
# initial states (not started reading any token)
ylInitial, ylInitialSpaces, ylInitialUnknown, ylInitialContent,
ylDefineTagHandleInitial, ylDefineTagURIInitial, ylInitialInLine,
ylLineEnd, ylDirectiveLineEnd,
# directive reading states
ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion,
ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment,
# scalar reading states
2015-12-23 09:28:58 +00:00
ylPlainScalar, ylPlainScalarNone, ylSingleQuotedScalar,
ylDoublyQuotedScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader,
2015-11-29 15:50:27 +00:00
ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar,
# indentation
ylIndentation,
# comments
ylComment,
# tags
ylTagHandle, ylTagSuffix, ylVerbatimTag,
# document separation
2015-11-29 21:43:10 +00:00
ylDashes, ylDots,
# anchoring
ylAnchor, ylAlias
2015-11-27 22:42:11 +00:00
2015-12-23 11:35:07 +00:00
YamlTypeHintState = enum
ythInitial,
ythF, ythFA, ythFAL, ythFALS, ythFALSE,
ythN, ythNU, ythNUL, ythNULL,
ythNO,
ythO, ythON,
ythOF, ythOFF,
ythT, ythTR, ythTRU, ythTRUE,
ythY, ythYE, ythYES,
ythPoint, ythPointI, ythPointIN, ythPointINF,
ythPointN, ythPointNA, ythPointNAN,
ythLowerFA, ythLowerFAL, ythLowerFALS,
ythLowerNU, ythLowerNUL,
ythLowerOF,
ythLowerTR, ythLowerTRU,
ythLowerYE,
ythPointLowerIN, ythPointLowerN, ythPointLowerNA,
ythMinus, yth0, ythInt, ythDecimal, ythNumE, ythNumEPlusMinus,
ythExponent, ythNone
2015-12-23 09:28:58 +00:00
2015-12-23 11:35:07 +00:00
YamlLexer = object of BaseLexer
2015-11-27 22:42:11 +00:00
indentations: seq[int]
encoding: Encoding
charlen: int
charoffset: int
content*: string # my.content of the last returned token.
line*, column*: int
2015-12-23 11:35:07 +00:00
typeHint*: YamlTypeHint
2015-11-27 22:42:11 +00:00
const
UTF8NextLine = toUTF8(Rune(0x85))
UTF8NonBreakingSpace = toUTF8(Rune(0xA0))
UTF8LineSeparator = toUTF8(Rune(0x2028))
2015-11-27 22:42:11 +00:00
UTF8ParagraphSeparator = toUTF8(Rune(0x2029))
proc detect_encoding(my: var YamlLexer) =
var numBomChars = 0
my.encoding = Unsupported
if my.bufpos == 3:
# BaseLexer already skipped UTF-8 BOM
my.encoding = UTF8
else:
case my.buf[0]
of '\0':
if my.buf[1] == '\0':
if my.buf[2] == '\0':
my.encoding = UTF32LE
elif my.buf[2] == '\xFE' and my.buf[3] == '\xFF':
my.encoding = UTF32BE
numBomChars = 4
else:
# this is probably not a unicode character stream,
# but we just use the next match in the table
my.encoding = UTF16BE
else:
# this is how a BOM-less UTF16BE input should actually look like
my.encoding = UTF16BE
of '\xFF':
case my.buf[1]
of '\xFE':
if my.buf[2] == '\0' and my.buf[3] == '\0':
my.encoding = UTF32LE
numBomChars = 4
else:
my.encoding = UTF16LE
numBomChars = 2
of '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
of '\xFE':
case my.buf[1]
of '\xFF':
my.encoding = UTF16BE
numBomChars = 2
of '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
else:
if my.buf[1] == '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
inc(my.bufPos, numBomChars)
my.charlen = case my.encoding
of UTF8, Unsupported: 1
of UTF16LE, UTF16BE: 2
of UTF32LE, UTF32BE: 4
my.charoffset = case my.encoding
of UTF8, Unsupported, UTF16LE, UTF32LE: 0
of UTF16BE: 1
of UTF32BE: 3
2015-12-23 11:35:07 +00:00
proc open(my: var YamlLexer, input: Stream) =
2015-11-27 22:42:11 +00:00
lexbase.open(my, input)
my.indentations = newSeq[int]()
my.detect_encoding()
my.content = ""
my.line = 1
my.column = 1
2015-11-27 22:42:11 +00:00
2015-12-11 21:55:21 +00:00
template yieldToken(kind: YamlLexerToken) {.dirty.} =
when defined(yamlDebug):
2015-12-23 11:35:07 +00:00
if kind == tScalar:
echo "Lexer token: tScalar(\"", my.content, "\")"
else:
2015-12-11 21:55:21 +00:00
echo "Lexer token: ", kind
yield kind
my.content = ""
2015-11-27 22:42:11 +00:00
2015-12-23 09:28:58 +00:00
template yieldScalarPart() {.dirty.} =
case typeHintState
of ythNULL:
my.typeHint = yTypeNull
of ythTRUE, ythON, ythYES, ythY:
my.typeHint = yTypeBoolTrue
of ythFALSE, ythOFF, ythNO, ythN:
my.typeHint = yTypeBoolFalse
2015-12-23 09:28:58 +00:00
of ythInt, yth0:
my.typeHint = yTypeInteger
of ythDecimal, ythExponent:
my.typeHint = yTypeFloat
of ythPointINF:
my.typeHint = yTypeFloatInf
of ythPointNAN:
my.typeHint = yTypeFloatNaN
2015-12-23 09:28:58 +00:00
else:
my.typeHint = yTypeUnknown
2015-12-23 09:28:58 +00:00
when defined(yamlDebug):
echo "Lexer token: tScalarPart(\"", my.content, "\".", typeHintState,
2015-12-23 09:28:58 +00:00
")"
2015-12-23 11:35:07 +00:00
yield tScalarPart
2015-12-23 09:28:58 +00:00
my.content = ""
2015-12-23 11:35:07 +00:00
template yieldLexerError(message: string) {.dirty.} =
when defined(yamlDebug):
echo "Lexer error: " & message
my.content = message
my.column = curPos
2015-12-23 11:35:07 +00:00
yield tError
my.content = ""
2015-11-27 22:42:11 +00:00
template handleCR() {.dirty.} =
2015-12-22 13:13:55 +00:00
my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen -
2015-11-27 22:42:11 +00:00
my.charoffset - 1
my.line.inc()
curPos = 1
2015-11-27 22:42:11 +00:00
template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1
my.line.inc()
curPos = 1
2015-11-27 22:42:11 +00:00
template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i)
macro typeHintStateMachine(c: untyped, content: untyped): stmt =
assert content.kind == nnkStmtList
result = newNimNode(nnkCaseStmt, content).add(copyNimNode(c))
for branch in content.children:
assert branch.kind == nnkOfBranch
var
charBranch = newNimNode(nnkOfBranch, branch)
i = 0
stateBranches = newNimNode(nnkCaseStmt, branch).add(
newIdentNode("typeHintState"))
while branch[i].kind != nnkStmtList:
charBranch.add(copyNimTree(branch[i]))
inc(i)
for rule in branch[i].children:
assert rule.kind == nnkInfix
assert ($rule[0].ident == "=>")
var stateBranch = newNimNode(nnkOfBranch, rule)
case rule[1].kind
of nnkBracket:
for item in rule[1].children:
stateBranch.add(item)
of nnkIdent:
stateBranch.add(rule[1])
else:
assert false
if rule[2].kind == nnkNilLit:
stateBranch.add(newStmtList(newNimNode(nnkDiscardStmt).add(
newEmptyNode())))
else:
stateBranch.add(newStmtList(newAssignment(
newIdentNode("typeHintState"), copyNimTree(rule[2]))))
stateBranches.add(stateBranch)
stateBranches.add(newNimNode(nnkElse).add(newStmtList(newAssignment(
newIdentNode("typeHintState"), newIdentNode("ythNone")),
newAssignment(newIdentNode("state"),
newIdentNode("ylPlainScalarNone")))))
charBranch.add(newStmtList(stateBranches))
result.add(charBranch)
result.add(newNimNode(nnkElse).add(newStmtList(newAssignment(
newIdentNode("typeHintState"), newIdentNode("ythNone")),
newAssignment(newIdentNode("state"),
newIdentNode("ylPlainScalarNone")))))
2015-12-23 09:28:58 +00:00
template advanceTypeHint(ch: char) {.dirty.} =
typeHintStateMachine ch:
2015-12-23 09:28:58 +00:00
of '.':
[yth0, ythInt] => ythDecimal
[ythInitial, ythMinus] => ythPoint
of '+': ythNumE => ythNumEPlusMinus
2015-12-23 09:28:58 +00:00
of '-':
ythInitial => ythMinus
ythNumE => ythNumEPlusMinus
2015-12-23 09:28:58 +00:00
of '0':
[ythInitial, ythMinus] => yth0
[ythNumE, ythNumEPlusMinus] => ythExponent
2015-12-23 09:28:58 +00:00
of '1'..'9':
[ythInitial, ythMinus] => ythInt
[ythNumE, ythNumEPlusMinus] => ythExponent
[ythInt, ythDecimal, ythExponent] => nil
2015-12-23 09:28:58 +00:00
of 'a':
ythF => ythLowerFA
ythPointN => ythPointNA
ythPointLowerN => ythPointLowerNA
of 'A':
ythF => ythFA
ythPointN => ythPointNA
2015-12-23 09:28:58 +00:00
of 'e':
[yth0, ythInt, ythDecimal] => ythNumE
ythLowerFALS => ythFALSE
ythLowerTRU => ythTRUE
ythY => ythLowerYE
2015-12-23 09:28:58 +00:00
of 'E':
[yth0, ythInt, ythDecimal] => ythNumE
ythFALS => ythFALSE
ythTRU => ythTRUE
ythY => ythYE
2015-12-23 09:28:58 +00:00
of 'f':
ythInitial => ythF
ythO => ythLowerOF
ythLowerOF => ythOFF
ythPointLowerIN => ythPointINF
of 'F':
ythInitial => ythF
ythO => ythOF
ythOF => ythOFF
ythPointIN => ythPointINF
of 'i', 'I': ythPoint => ythPointI
2015-12-23 09:28:58 +00:00
of 'l':
ythLowerNU => ythLowerNUL
ythLowerNUL => ythNULL
ythLowerFA => ythLowerFAL
of 'L':
ythNU => ythNUL
ythNUL => ythNULL
ythFA => ythFAL
2015-12-23 09:28:58 +00:00
of 'n':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointLowerN
ythPointI => ythPointLowerIN
ythPointLowerNA => ythPointNAN
of 'N':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointN
ythPointI => ythPointIN
ythPointNA => ythPointNAN
of 'o', 'O':
ythInitial => ythO
ythN => ythNO
of 'r': ythT => ythLowerTR
of 'R': ythT => ythTR
2015-12-23 09:28:58 +00:00
of 's':
ythLowerFAL => ythLowerFALS
ythLowerYE => ythYES
of 'S':
ythFAL => ythFALS
ythYE => ythYES
of 't', 'T': ythInitial => ythT
2015-12-23 09:28:58 +00:00
of 'u':
ythN => ythLowerNU
ythLowerTR => ythLowerTRU
of 'U':
ythN => ythNU
ythTR => ythTRU
of 'y', 'Y': ythInitial => ythY
2015-12-23 09:28:58 +00:00
2015-12-23 11:35:07 +00:00
iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
2015-11-27 22:42:11 +00:00
var
# the following three values are used for parsing escaped unicode chars
2015-11-27 22:42:11 +00:00
unicodeChar: Rune = cast[Rune](0)
escapeLength = 0
expectedEscapeLength = 0
2015-11-29 15:50:27 +00:00
trailingSpace = ""
# used to temporarily store whitespace after a plain scalar
2015-11-27 22:42:11 +00:00
lastSpecialChar: char = '\0'
# stores chars that behave differently dependent on the following
# char. handling will be deferred to next loop iteration.
2015-11-27 22:42:11 +00:00
flowDepth = 0
# Lexer must know whether it parses block or flow style. Therefore,
# it counts the number of open flow arrays / maps here
state: YamlLexerState = ylInitial # lexer state
typeHintState: YamlTypeHintState = ythInitial
# for giving type hints of plain scalars
lastIndentationLength = 0
# after parsing the indentation of the line, this will hold the
# indentation length of the current line. Needed for checking where
# a block scalar ends.
blockScalarIndentation = -1
# when parsing a block scalar, this will be set to the indentation
# of the line that starts the flow scalar.
curPos = 1
2015-11-27 22:42:11 +00:00
while true:
let c = my.buf[my.bufpos + my.charoffset]
case state
of ylInitial:
case c
of '%':
state = ylDirective
continue
of ' ', '\t':
state = ylInitialSpaces
continue
of '#':
state = ylDirectiveComment
else:
state = ylInitialContent
continue
of ylInitialSpaces:
case c
of ' ', '\t':
my.content.add(c)
of '#':
2015-11-29 15:50:27 +00:00
my.content = ""
state = ylDirectiveComment
of EndOfFile, '\r', '\x0A':
state = ylDirectiveLineEnd
continue
else:
state = ylIndentation
continue
of ylInitialContent:
case c
of '-':
my.column = curPos
state = ylDashes
continue
of '.':
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
my.column = curPos
state = ylDots
continue
else:
state = ylIndentation
continue
of ylDashes:
case c
of '-':
my.content.add(c)
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
2015-12-23 11:35:07 +00:00
yieldToken(tDirectivesEnd)
state = ylInitialInLine
of 1:
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
lastSpecialChar = '-'
state = ylInitialInLine
else:
let tmp = my.content
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
my.content = tmp
my.column = curPos
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
else:
2015-12-23 09:28:58 +00:00
let tmp = my.content
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
2015-12-23 09:28:58 +00:00
my.content = tmp
if my.content.len == 1:
typeHintState = ythMinus
state = ylPlainScalar
else:
typeHintState = ythNone
state = ylPlainScalarNone
continue
of ylDots:
case c
of '.':
my.content.add(c)
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
2015-12-23 11:35:07 +00:00
yieldToken(tDocumentEnd)
state = ylDirectiveLineEnd
else:
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
else:
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
of ylDirectiveLineEnd:
case c
of '\r':
handleCR()
state = ylInitial
2015-11-29 15:50:27 +00:00
continue
of '\x0A':
handleLF()
state = ylInitial
2015-11-29 15:50:27 +00:00
continue
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldToken(tStreamEnd)
break
of ' ', '\t':
discard
of '#':
state = ylDirectiveComment
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected content at end of directive: " & c)
2015-11-27 22:42:11 +00:00
of ylLineEnd:
case c
of '\r':
handleCR()
of '\x0A':
handleLF()
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Internal error: Unexpected char at line end: " & c)
state = ylInitialContent
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
of ylSingleQuotedScalar:
if lastSpecialChar != '\0':
# ' is the only special char
case c
of '\'':
my.content.add(c)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
2015-11-29 15:50:27 +00:00
state = ylSpaceAfterQuotedScalar
2015-11-27 22:42:11 +00:00
continue
else:
case c
of '\'':
lastSpecialChar = c
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated single quoted string")
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ylDoublyQuotedScalar:
case c
of '"':
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-29 15:50:27 +00:00
state = ylSpaceAfterQuotedScalar
2015-11-27 22:42:11 +00:00
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated doubly quoted string")
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
of '\\':
state = ylEscape
escapeLength = 0
of '\r':
my.content.add("\x0A")
2015-11-27 22:42:11 +00:00
handleCR()
of '\x0A':
my.content.add(c)
2015-11-27 22:42:11 +00:00
handleLF()
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ylEscape:
if escapeLength == 0:
expectedEscapeLength = 0
case c
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated doubly quoted string")
of '0': my.content.add('\0')
of 'a': my.content.add('\x07')
of 'b': my.content.add('\x08')
of '\t', 't': my.content.add('\t')
of 'n': my.content.add('\x0A')
of 'v': my.content.add('\v')
of 'f': my.content.add('\f')
of 'r': my.content.add('\r')
of 'e': my.content.add('\e')
of ' ': my.content.add(' ')
of '"': my.content.add('"')
of '/': my.content.add('/')
of '\\': my.content.add('\\')
of 'N': my.content.add(UTF8NextLine)
of '_': my.content.add(UTF8NonBreakingSpace)
of 'L': my.content.add(UTF8LineSeparator)
of 'P': my.content.add(UTF8ParagraphSeparator)
2015-11-27 22:42:11 +00:00
of 'x': unicodeChar = cast[Rune](0); expectedEscapeLength = 3
of 'u': unicodeChar = cast[Rune](0); expectedEscapeLength = 5
of 'U': unicodeChar = cast[Rune](0); expectedEscapeLength = 9
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unsupported escape sequence: \\" & c)
2015-11-27 22:42:11 +00:00
if expectedEscapeLength == 0: state = ylDoublyQuotedScalar
else:
2015-11-29 20:01:22 +00:00
let digitPosition = expectedEscapeLength - escapeLength - 1
2015-11-27 22:42:11 +00:00
case c
of EndOFFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated escape sequence")
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of '0' .. '9':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x30) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
of 'A' .. 'F':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x37) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
of 'a' .. 'f':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x57) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("unsupported char in unicode escape sequence: " &
2015-11-29 20:01:22 +00:00
c)
2015-11-27 22:42:11 +00:00
escapeLength = 0
state = ylDoublyQuotedScalar
continue
inc(escapeLength)
if escapeLength == expectedEscapeLength and escapeLength > 0:
my.content.add(toUTF8(unicodeChar))
2015-11-27 22:42:11 +00:00
state = ylDoublyQuotedScalar
2015-11-29 15:50:27 +00:00
of ylSpaceAfterQuotedScalar:
case c
of ' ', '\t':
trailingSpace.add(c)
of '#':
if trailingSpace.len > 0:
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing space before comment start")
2015-11-29 15:50:27 +00:00
state = ylComment
trailingSpace = ""
else:
trailingSpace = ""
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
of ylPlainScalar:
2015-11-29 15:50:27 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
2015-11-29 15:50:27 +00:00
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-29 15:50:27 +00:00
of '[', ']', '{', '}':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylInitialInLine
continue
else:
advanceTypeHint(c)
my.content.add(c)
of ylPlainScalarNone:
case c
of EndOfFile, '\r', '\x0A':
yieldScalarPart()
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
of '[', ']', '{', '}':
yieldScalarPart()
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
else:
my.content.add(c)
of ylSpaceAfterPlainScalar:
2015-11-27 22:42:11 +00:00
if lastSpecialChar != '\0':
case c
of ' ', '\t', EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
else:
2015-11-29 15:50:27 +00:00
my.content.add(trailingSpace)
my.content.add(lastSpecialChar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
yieldScalarPart()
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
2015-11-29 15:50:27 +00:00
of ' ', '\t':
trailingSpace.add(c)
of ',':
if flowDepth > 0:
lastSpecialChar = c
else:
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-27 22:42:11 +00:00
of ':', '#':
lastSpecialChar = c
of '[', ']', '{', '}':
yieldScalarPart()
2015-11-29 15:50:27 +00:00
trailingSpace = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
else:
2015-11-29 15:50:27 +00:00
my.content.add(trailingSpace)
my.content.add(c)
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-27 22:42:11 +00:00
of ylInitialInLine:
2015-11-27 22:42:11 +00:00
if lastSpecialChar != '\0':
my.column = curPos - 1
2015-11-27 22:42:11 +00:00
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
case lastSpecialChar
of '#':
my.content = "#"
2015-11-27 22:42:11 +00:00
state = ylComment
of ':':
2015-12-23 11:35:07 +00:00
yieldToken(tColon)
of '?':
2015-12-23 11:35:07 +00:00
yieldToken(tQuestionmark)
of '-':
2015-12-23 11:35:07 +00:00
yieldToken(tDash)
of ',':
2015-12-23 11:35:07 +00:00
yieldToken(tComma)
2015-12-21 22:10:42 +00:00
of '!':
my.content = "!"
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-12-21 22:10:42 +00:00
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected special char: \"" &
lastSpecialChar & "\"")
lastSpecialChar = '\0'
2015-11-27 22:42:11 +00:00
elif lastSpecialChar == '!':
case c
of '<':
state = ylVerbatimTag
lastSpecialChar = '\0'
my.bufpos += my.charlen
else:
state = ylTagHandle
my.content = "!"
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
my.column = curPos - 1
2015-11-27 22:42:11 +00:00
else:
my.content.add(lastSpecialChar)
advanceTypeHint(lastSpecialChar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
my.column = curPos - 1
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
2015-11-27 22:42:11 +00:00
continue
case c
2015-11-29 15:50:27 +00:00
of '\r', '\x0A', EndOfFile:
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of ',':
if flowDepth > 0:
2015-12-23 11:35:07 +00:00
yieldToken(tComma)
2015-11-27 22:42:11 +00:00
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
of '[':
2015-11-27 22:42:11 +00:00
inc(flowDepth)
2015-12-23 11:35:07 +00:00
yieldToken(tOpeningBracket)
of '{':
inc(flowDepth)
2015-12-23 11:35:07 +00:00
yieldToken(tOpeningBrace)
of ']':
2015-12-23 11:35:07 +00:00
yieldToken(tClosingBracket)
if flowDepth > 0:
inc(flowDepth, -1)
of '}':
2015-12-23 11:35:07 +00:00
yieldToken(tClosingBrace)
if flowDepth > 0:
2015-11-27 22:42:11 +00:00
inc(flowDepth, -1)
of '#':
lastSpecialChar = '#'
of '"':
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylDoublyQuotedScalar
of '\'':
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylSingleQuotedScalar
of '!':
my.column = curPos
2015-12-07 18:09:02 +00:00
lastSpecialChar = '!'
2015-11-27 22:42:11 +00:00
of '&':
my.column = curPos
2015-11-29 21:43:10 +00:00
state = ylAnchor
2015-11-27 22:42:11 +00:00
of '*':
my.column = curPos
2015-11-29 21:43:10 +00:00
state = ylAlias
2015-11-27 22:42:11 +00:00
of ' ':
discard
of '-':
if flowDepth == 0:
2015-11-27 22:42:11 +00:00
lastSpecialChar = '-'
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
2015-11-27 22:42:11 +00:00
of '?', ':':
my.column = curPos
2015-11-27 22:42:11 +00:00
lastSpecialChar = c
of '|':
2015-12-23 11:35:07 +00:00
yieldToken(tPipe)
state = ylBlockScalarHeader
of '>':
2015-12-23 11:35:07 +00:00
yieldToken(tGreater)
state = ylBlockScalarHeader
2015-11-27 22:42:11 +00:00
of '\t':
discard
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
of ylComment, ylDirectiveComment:
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tComment)
case state
of ylComment:
state = ylLineEnd
of ylDirectiveComment:
state = ylDirectiveLineEnd
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Should never happen")
2015-11-27 22:42:11 +00:00
continue
else:
my.content.add(c)
of ylIndentation:
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
lastIndentationLength = my.content.len
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of ' ':
my.content.add(' ')
2015-11-27 22:42:11 +00:00
else:
lastIndentationLength = my.content.len
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
if blockScalarIndentation != -1:
if lastIndentationLength <= blockScalarIndentation:
blockScalarIndentation = -1
else:
state = ylBlockScalar
continue
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
of ylTagHandle:
case c
of '!':
my.content.add(c)
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-11-27 22:42:11 +00:00
state = ylTagSuffix
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ' ', '\t', EndOfFile, '\r', '\x0A':
var suffix = my.content[1..^1]
my.content = "!"
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
my.content = suffix
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag handle: " & c)
my.content = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylTagSuffix:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ' ', '\t', EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag suffix: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylVerbatimTag:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '>':
2015-12-23 11:35:07 +00:00
yieldToken(tVerbatimTag)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished verbatim tag")
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag URI: " & c)
my.content = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDirective:
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
if my.content == "%YAML":
2015-12-23 11:35:07 +00:00
yieldToken(tYamlDirective)
2015-11-27 22:42:11 +00:00
state = ylMajorVersion
elif my.content == "%TAG":
2015-12-23 11:35:07 +00:00
yieldToken(tTagDirective)
2015-11-27 22:42:11 +00:00
state = ylDefineTagHandleInitial
else:
2015-12-23 11:35:07 +00:00
yieldToken(tUnknownDirective)
state = ylInitialUnknown
2015-11-27 22:42:11 +00:00
if c == EndOfFile:
continue
else:
my.content.add(c)
of ylInitialUnknown:
case c
of ' ', '\t':
discard
of '\r', '\x0A', EndOfFile:
state = ylDirectiveLineEnd
continue
of '#':
state = ylDirectiveComment
continue
else:
state = ylUnknownDirectiveParam
2015-11-29 15:50:27 +00:00
continue
of ylUnknownDirectiveParam:
case c
of '\r', '\x0A', EndOfFile, ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tUnknownDirectiveParam)
state = ylInitialUnknown
continue
else:
my.content.add(c)
of ylMajorVersion:
2015-11-27 22:42:11 +00:00
case c
of '0' .. '9':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '.':
2015-12-23 11:35:07 +00:00
yieldToken(tVersionPart)
state = ylMinorVersion
of EndOfFile, '\r', '\x0A', ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing YAML minor version.")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
of ylMinorVersion:
case c
of '0' .. '9':
my.content.add(c)
of EndOfFile, '\r', '\x0A', ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tVersionPart)
state = ylDirectiveLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
2015-11-27 22:42:11 +00:00
of ylDefineTagHandleInitial:
case c
of ' ', '\t':
discard
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
of '!':
my.content.add(c)
2015-11-27 22:42:11 +00:00
state = ylDefineTagHandle
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected character in %TAG directive: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDefineTagHandle:
case c
of '!':
my.content.add(c)
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-11-27 22:42:11 +00:00
state = ylDefineTagURIInitial
of 'a' .. 'z', 'A' .. 'Z', '-':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected char in %TAG directive: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDefineTagURIInitial:
case c
of '\t', ' ':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '\x0A', '\r', EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
if my.content.len == 0:
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing whitespace in %TAG directive")
my.content = ""
2015-11-27 22:42:11 +00:00
state = ylDefineTagURI
continue
of ylDefineTagURI:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
of '\x0A', '\r', EndOfFile, ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tTagURI)
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid URI character: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
of ylBlockScalarHeader:
case c
of '0' .. '9':
my.content = "" & c
2015-12-23 11:35:07 +00:00
yieldToken(tBlockIndentationIndicator)
2015-12-17 20:44:41 +00:00
of '+':
2015-12-23 11:35:07 +00:00
yieldToken(tPlus)
2015-12-17 20:44:41 +00:00
of '-':
2015-12-23 11:35:07 +00:00
yieldToken(tDash)
of '\r', '\x0A', EndOfFile:
blockScalarIndentation = lastIndentationLength
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected character in block scalar header: " & c)
of ylBlockScalar:
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylLineEnd
continue
else:
my.content.add(c)
2015-11-29 21:43:10 +00:00
of ylAnchor:
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAnchor)
2015-11-29 21:43:10 +00:00
state = ylInitialInLine
continue
else:
my.content.add(c)
of ylAlias:
if lastSpecialChar != '\0':
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
state = ylInitialInLine
continue
else:
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
2015-11-29 21:43:10 +00:00
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
2015-11-29 21:43:10 +00:00
state = ylInitialInLine
continue
of ':':
lastSpecialChar = ':'
of ',':
if flowDepth > 0:
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
state = ylInitialInLine
continue
my.content.add(c)
2015-11-29 21:43:10 +00:00
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
my.bufpos += my.charlen
curPos.inc