NimYAML/src/private/lexer.nim

1073 lines
36 KiB
Nim
Raw Normal View History

2015-12-23 11:35:07 +00:00
# file must be included from yaml.nim and cannot compile on its own
2015-11-27 22:42:11 +00:00
type
2015-12-23 11:35:07 +00:00
Encoding = enum
2015-11-27 22:42:11 +00:00
Unsupported, ## Unsupported encoding
UTF8, ## UTF-8
UTF16LE, ## UTF-16 Little Endian
UTF16BE, ## UTF-16 Big Endian
UTF32LE, ## UTF-32 Little Endian
UTF32BE ## UTF-32 Big Endian
2015-12-23 11:35:07 +00:00
YamlLexerToken = enum
# separating tokens
2015-12-23 11:35:07 +00:00
tDirectivesEnd, tDocumentEnd, tStreamEnd,
# tokens only in directives
2015-12-23 11:35:07 +00:00
tTagDirective, tYamlDirective, tUnknownDirective,
tVersionPart, tTagURI,
tUnknownDirectiveParam,
# tokens in directives and content
2015-12-23 11:35:07 +00:00
tTagHandle, tComment,
# from here on tokens only in content
2015-12-23 11:35:07 +00:00
tLineStart,
# control characters
2015-12-23 11:35:07 +00:00
tColon, tDash, tQuestionmark, tComma, tOpeningBrace,
tOpeningBracket, tClosingBrace, tClosingBracket, tPipe, tGreater,
# block scalar header
2015-12-23 11:35:07 +00:00
tBlockIndentationIndicator, tPlus,
# scalar content
2015-12-23 11:35:07 +00:00
tScalar, tScalarPart,
# tags
2015-12-23 11:35:07 +00:00
tVerbatimTag, tTagSuffix,
# anchoring
2015-12-23 11:35:07 +00:00
tAnchor, tAlias,
# error reporting
2015-12-23 11:35:07 +00:00
tError
2015-12-11 21:55:21 +00:00
2015-11-27 22:42:11 +00:00
YamlLexerState = enum
# initial states (not started reading any token)
ylInitial, ylInitialSpaces, ylInitialUnknown, ylInitialContent,
ylDefineTagHandleInitial, ylDefineTagURIInitial, ylInitialInLine,
ylLineEnd, ylDirectiveLineEnd,
# directive reading states
ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion,
ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment,
# scalar reading states
2015-12-23 09:28:58 +00:00
ylPlainScalar, ylPlainScalarNone, ylSingleQuotedScalar,
ylDoublyQuotedScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader,
2015-11-29 15:50:27 +00:00
ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar,
# indentation
ylIndentation,
# comments
ylComment,
# tags
ylTagHandle, ylTagSuffix, ylVerbatimTag,
# document separation
2015-11-29 21:43:10 +00:00
ylDashes, ylDots,
# anchoring
ylAnchor, ylAlias
2015-11-27 22:42:11 +00:00
2015-12-23 11:35:07 +00:00
YamlTypeHintState = enum
2015-12-23 09:28:58 +00:00
ythInitial, ythN, ythNU, ythNUL, ythNULL, ythF, ythFA, ythFAL, ythFALS,
ythFALSE, ythT, ythTR, ythTRU, ythTRUE, ythMinus, yth0, ythInt,
ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent, ythNone
2015-12-23 11:35:07 +00:00
YamlLexer = object of BaseLexer
2015-11-27 22:42:11 +00:00
indentations: seq[int]
encoding: Encoding
charlen: int
charoffset: int
content*: string # my.content of the last returned token.
line*, column*: int
2015-12-23 11:35:07 +00:00
typeHint*: YamlTypeHint
2015-11-27 22:42:11 +00:00
const
UTF8NextLine = toUTF8(Rune(0x85))
UTF8NonBreakingSpace = toUTF8(Rune(0xA0))
UTF8LineSeparator = toUTF8(Rune(0x2028))
2015-11-27 22:42:11 +00:00
UTF8ParagraphSeparator = toUTF8(Rune(0x2029))
proc detect_encoding(my: var YamlLexer) =
var numBomChars = 0
my.encoding = Unsupported
if my.bufpos == 3:
# BaseLexer already skipped UTF-8 BOM
my.encoding = UTF8
else:
case my.buf[0]
of '\0':
if my.buf[1] == '\0':
if my.buf[2] == '\0':
my.encoding = UTF32LE
elif my.buf[2] == '\xFE' and my.buf[3] == '\xFF':
my.encoding = UTF32BE
numBomChars = 4
else:
# this is probably not a unicode character stream,
# but we just use the next match in the table
my.encoding = UTF16BE
else:
# this is how a BOM-less UTF16BE input should actually look like
my.encoding = UTF16BE
of '\xFF':
case my.buf[1]
of '\xFE':
if my.buf[2] == '\0' and my.buf[3] == '\0':
my.encoding = UTF32LE
numBomChars = 4
else:
my.encoding = UTF16LE
numBomChars = 2
of '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
of '\xFE':
case my.buf[1]
of '\xFF':
my.encoding = UTF16BE
numBomChars = 2
of '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
else:
if my.buf[1] == '\0':
my.encoding = UTF16LE
else:
my.encoding = UTF8
inc(my.bufPos, numBomChars)
my.charlen = case my.encoding
of UTF8, Unsupported: 1
of UTF16LE, UTF16BE: 2
of UTF32LE, UTF32BE: 4
my.charoffset = case my.encoding
of UTF8, Unsupported, UTF16LE, UTF32LE: 0
of UTF16BE: 1
of UTF32BE: 3
2015-12-23 11:35:07 +00:00
proc open(my: var YamlLexer, input: Stream) =
2015-11-27 22:42:11 +00:00
lexbase.open(my, input)
my.indentations = newSeq[int]()
my.detect_encoding()
my.content = ""
my.line = 0
my.column = 0
2015-11-27 22:42:11 +00:00
2015-12-11 21:55:21 +00:00
template yieldToken(kind: YamlLexerToken) {.dirty.} =
when defined(yamlDebug):
2015-12-23 11:35:07 +00:00
if kind == tScalar:
echo "Lexer token: tScalar(\"", my.content, "\")"
else:
2015-12-11 21:55:21 +00:00
echo "Lexer token: ", kind
yield kind
my.content = ""
2015-11-27 22:42:11 +00:00
2015-12-23 09:28:58 +00:00
template yieldScalarPart() {.dirty.} =
case typeHintState
of ythNULL:
my.typeHint = yTypeNull
of ythTRUE, ythFALSE:
my.typeHint = yTypeBoolean
of ythInt, yth0:
my.typeHint = yTypeInteger
of ythDecimal, ythExponent:
my.typeHint = yTypeFloat
else:
my.typeHint = yTypeString
when defined(yamlDebug):
2015-12-23 11:35:07 +00:00
echo "Lexer token: tScalarPart(\"", my.content, "\".", my.typeHint,
2015-12-23 09:28:58 +00:00
")"
2015-12-23 11:35:07 +00:00
yield tScalarPart
2015-12-23 09:28:58 +00:00
my.content = ""
2015-12-23 11:35:07 +00:00
template yieldLexerError(message: string) {.dirty.} =
when defined(yamlDebug):
echo "Lexer error: " & message
my.content = message
2015-12-23 11:35:07 +00:00
yield tError
my.content = ""
2015-11-27 22:42:11 +00:00
template handleCR() {.dirty.} =
2015-12-22 13:13:55 +00:00
my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen -
2015-11-27 22:42:11 +00:00
my.charoffset - 1
my.line.inc()
curPos = 0
2015-11-27 22:42:11 +00:00
template handleLF() {.dirty.} =
my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) +
my.charlen - my.charoffset - 1
my.line.inc()
curPos = 0
2015-11-27 22:42:11 +00:00
template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i)
2015-12-23 09:28:58 +00:00
template advanceTypeHint(ch: char) {.dirty.} =
case ch
of '.':
case typeHintState
of yth0, ythInt:
typeHintState = ythDecimal
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '+':
case typeHintState
of ythNumE:
typeHintState = ythNumEPlusMinus
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '-':
case typeHintState
of ythInitial:
typeHintState = ythMinus
of ythNumE:
typeHintState = ythNumEPlusMinus
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '0':
case typeHintState
of ythInitial, ythMinus:
typeHintState = yth0
of ythNumE, ythNumEPlusMinus:
typeHintState = ythExponent
of ythInt, ythDecimal, ythExponent:
discard
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '1'..'9':
case typeHintState
of ythInitial, ythMinus:
typeHintState = ythInt
of ythNumE, ythNumEPlusMinus:
typeHintState = ythExponent
of ythInt, ythDecimal, ythExponent:
discard
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'a':
case typeHintState
of ythF:
typeHintState = ythFA
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'e':
case typeHintState
of yth0, ythInt, ythDecimal:
typeHintState = ythNumE
of ythTRU:
typeHintState = ythTRUE
of ythFALS:
typeHintState = ythFALSE
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'E':
case typeHintState
of yth0, ythInt, ythDecimal:
typeHintState = ythNumE
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'f':
case typeHintState
of ythInitial:
typeHintState = ythF
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'l':
case typeHintState
of ythNU:
typeHintState = ythNUL
of ythNUL:
typeHintState = ythNULL
of ythFA:
typeHintState = ythFAL
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'n':
case typeHintState
of ythInitial:
typeHintState = ythN
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'r':
case typeHintState
of ythT:
typeHintState = ythTR
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 's':
case typeHintState
of ythFAL:
typeHintState = ythFALS
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 't':
case typeHintState
of ythInitial:
typeHintState = ythT
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'u':
case typeHintState
of ythN:
typeHintState = ythNU
of ythTR:
typeHintState = ythTRU
else:
typeHintState = ythNone
state = ylPlainScalarNone
else:
typeHintState = ythNone
state = ylPlainScalarNone
2015-12-23 11:35:07 +00:00
iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
2015-11-27 22:42:11 +00:00
var
# the following three values are used for parsing escaped unicode chars
2015-11-27 22:42:11 +00:00
unicodeChar: Rune = cast[Rune](0)
escapeLength = 0
expectedEscapeLength = 0
2015-11-29 15:50:27 +00:00
trailingSpace = ""
# used to temporarily store whitespace after a plain scalar
2015-11-27 22:42:11 +00:00
lastSpecialChar: char = '\0'
# stores chars that behave differently dependent on the following
# char. handling will be deferred to next loop iteration.
2015-11-27 22:42:11 +00:00
flowDepth = 0
# Lexer must know whether it parses block or flow style. Therefore,
# it counts the number of open flow arrays / maps here
state = ylInitial # lexer state
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial # for giving type hints of plain scalars
lastIndentationLength = 0
# after parsing the indentation of the line, this will hold the
# indentation length of the current line. Needed for checking where
# a block scalar ends.
blockScalarIndentation = -1
# when parsing a block scalar, this will be set to the indentation
# of the line that starts the flow scalar.
curPos = 0
2015-11-27 22:42:11 +00:00
while true:
let c = my.buf[my.bufpos + my.charoffset]
case state
of ylInitial:
case c
of '%':
state = ylDirective
continue
of ' ', '\t':
state = ylInitialSpaces
continue
of '#':
state = ylDirectiveComment
else:
state = ylInitialContent
continue
of ylInitialSpaces:
case c
of ' ', '\t':
my.content.add(c)
of '#':
2015-11-29 15:50:27 +00:00
my.content = ""
state = ylDirectiveComment
of EndOfFile, '\r', '\x0A':
state = ylDirectiveLineEnd
continue
else:
state = ylIndentation
continue
of ylInitialContent:
case c
of '-':
my.column = 0
state = ylDashes
continue
of '.':
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
my.column = 0
state = ylDots
continue
else:
state = ylIndentation
continue
of ylDashes:
case c
of '-':
my.content.add(c)
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
2015-12-23 11:35:07 +00:00
yieldToken(tDirectivesEnd)
state = ylInitialInLine
of 1:
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
lastSpecialChar = '-'
state = ylInitialInLine
else:
let tmp = my.content
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
my.content = tmp
my.column = curPos
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
else:
2015-12-23 09:28:58 +00:00
let tmp = my.content
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
2015-12-23 09:28:58 +00:00
my.content = tmp
if my.content.len == 1:
typeHintState = ythMinus
state = ylPlainScalar
else:
typeHintState = ythNone
state = ylPlainScalarNone
continue
of ylDots:
case c
of '.':
my.content.add(c)
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
2015-12-23 11:35:07 +00:00
yieldToken(tDocumentEnd)
state = ylDirectiveLineEnd
else:
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
else:
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
continue
of ylDirectiveLineEnd:
case c
of '\r':
handleCR()
state = ylInitial
2015-11-29 15:50:27 +00:00
continue
of '\x0A':
handleLF()
state = ylInitial
2015-11-29 15:50:27 +00:00
continue
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldToken(tStreamEnd)
break
of ' ', '\t':
discard
of '#':
state = ylDirectiveComment
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected content at end of directive: " & c)
2015-11-27 22:42:11 +00:00
of ylLineEnd:
case c
of '\r':
handleCR()
of '\x0A':
handleLF()
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Internal error: Unexpected char at line end: " & c)
state = ylInitialContent
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
of ylSingleQuotedScalar:
if lastSpecialChar != '\0':
# ' is the only special char
case c
of '\'':
my.content.add(c)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
2015-11-29 15:50:27 +00:00
state = ylSpaceAfterQuotedScalar
2015-11-27 22:42:11 +00:00
continue
else:
case c
of '\'':
lastSpecialChar = c
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated single quoted string")
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ylDoublyQuotedScalar:
case c
of '"':
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-29 15:50:27 +00:00
state = ylSpaceAfterQuotedScalar
2015-11-27 22:42:11 +00:00
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated doubly quoted string")
yieldToken(tStreamEnd)
2015-11-27 22:42:11 +00:00
break
of '\\':
state = ylEscape
escapeLength = 0
of '\r':
my.content.add("\x0A")
2015-11-27 22:42:11 +00:00
handleCR()
of '\x0A':
my.content.add(c)
2015-11-27 22:42:11 +00:00
handleLF()
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ylEscape:
if escapeLength == 0:
expectedEscapeLength = 0
case c
of EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated doubly quoted string")
of '0': my.content.add('\0')
of 'a': my.content.add('\x07')
of 'b': my.content.add('\x08')
of '\t', 't': my.content.add('\t')
of 'n': my.content.add('\x0A')
of 'v': my.content.add('\v')
of 'f': my.content.add('\f')
of 'r': my.content.add('\r')
of 'e': my.content.add('\e')
of ' ': my.content.add(' ')
of '"': my.content.add('"')
of '/': my.content.add('/')
of '\\': my.content.add('\\')
of 'N': my.content.add(UTF8NextLine)
of '_': my.content.add(UTF8NonBreakingSpace)
of 'L': my.content.add(UTF8LineSeparator)
of 'P': my.content.add(UTF8ParagraphSeparator)
2015-11-27 22:42:11 +00:00
of 'x': unicodeChar = cast[Rune](0); expectedEscapeLength = 3
of 'u': unicodeChar = cast[Rune](0); expectedEscapeLength = 5
of 'U': unicodeChar = cast[Rune](0); expectedEscapeLength = 9
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unsupported escape sequence: \\" & c)
2015-11-27 22:42:11 +00:00
if expectedEscapeLength == 0: state = ylDoublyQuotedScalar
else:
2015-11-29 20:01:22 +00:00
let digitPosition = expectedEscapeLength - escapeLength - 1
2015-11-27 22:42:11 +00:00
case c
of EndOFFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unterminated escape sequence")
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of '0' .. '9':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x30) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
of 'A' .. 'F':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x37) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
of 'a' .. 'f':
unicodeChar = unicodechar or
2015-11-29 20:01:22 +00:00
(cast[int](c) - 0x57) shl (digitPosition * 4)
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("unsupported char in unicode escape sequence: " &
2015-11-29 20:01:22 +00:00
c)
2015-11-27 22:42:11 +00:00
escapeLength = 0
state = ylDoublyQuotedScalar
continue
inc(escapeLength)
if escapeLength == expectedEscapeLength and escapeLength > 0:
my.content.add(toUTF8(unicodeChar))
2015-11-27 22:42:11 +00:00
state = ylDoublyQuotedScalar
2015-11-29 15:50:27 +00:00
of ylSpaceAfterQuotedScalar:
case c
of ' ', '\t':
trailingSpace.add(c)
of '#':
if trailingSpace.len > 0:
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing space before comment start")
2015-11-29 15:50:27 +00:00
state = ylComment
trailingSpace = ""
else:
trailingSpace = ""
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
of ylPlainScalar:
2015-11-29 15:50:27 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
2015-11-29 15:50:27 +00:00
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-29 15:50:27 +00:00
of '[', ']', '{', '}':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylInitialInLine
continue
else:
advanceTypeHint(c)
my.content.add(c)
of ylPlainScalarNone:
case c
of EndOfFile, '\r', '\x0A':
yieldScalarPart()
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
of '[', ']', '{', '}':
yieldScalarPart()
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
else:
my.content.add(c)
of ylSpaceAfterPlainScalar:
2015-11-27 22:42:11 +00:00
if lastSpecialChar != '\0':
case c
of ' ', '\t', EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
else:
2015-11-29 15:50:27 +00:00
my.content.add(trailingSpace)
my.content.add(lastSpecialChar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
yieldScalarPart()
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
2015-11-29 15:50:27 +00:00
of ' ', '\t':
trailingSpace.add(c)
of ',':
if flowDepth > 0:
lastSpecialChar = c
else:
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-27 22:42:11 +00:00
of ':', '#':
lastSpecialChar = c
of '[', ']', '{', '}':
2015-12-23 11:35:07 +00:00
yieldToken(tScalar)
2015-11-29 15:50:27 +00:00
trailingSpace = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
else:
2015-11-29 15:50:27 +00:00
my.content.add(trailingSpace)
my.content.add(c)
2015-11-29 15:50:27 +00:00
trailingSpace = ""
2015-12-23 09:28:58 +00:00
state = ylPlainScalarNone
typeHintState = ythNone
2015-11-27 22:42:11 +00:00
of ylInitialInLine:
2015-11-27 22:42:11 +00:00
if lastSpecialChar != '\0':
my.column = curPos - 1
2015-11-27 22:42:11 +00:00
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
case lastSpecialChar
of '#':
my.content = "#"
2015-11-27 22:42:11 +00:00
state = ylComment
of ':':
2015-12-23 11:35:07 +00:00
yieldToken(tColon)
of '?':
2015-12-23 11:35:07 +00:00
yieldToken(tQuestionmark)
of '-':
2015-12-23 11:35:07 +00:00
yieldToken(tDash)
of ',':
2015-12-23 11:35:07 +00:00
yieldToken(tComma)
2015-12-21 22:10:42 +00:00
of '!':
my.content = "!"
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-12-21 22:10:42 +00:00
my.content = ""
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected special char: \"" &
lastSpecialChar & "\"")
lastSpecialChar = '\0'
2015-11-27 22:42:11 +00:00
elif lastSpecialChar == '!':
case c
of '<':
state = ylVerbatimTag
lastSpecialChar = '\0'
my.bufpos += my.charlen
else:
state = ylTagHandle
my.content = "!"
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
else:
my.content.add(lastSpecialChar)
2015-11-27 22:42:11 +00:00
lastSpecialChar = '\0'
my.column = curPos - 1
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(lastSpecialChar)
2015-11-27 22:42:11 +00:00
continue
case c
2015-11-29 15:50:27 +00:00
of '\r', '\x0A', EndOfFile:
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of ',':
if flowDepth > 0:
2015-12-23 11:35:07 +00:00
yieldToken(tComma)
2015-11-27 22:42:11 +00:00
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
of '[':
2015-11-27 22:42:11 +00:00
inc(flowDepth)
2015-12-23 11:35:07 +00:00
yieldToken(tOpeningBracket)
of '{':
inc(flowDepth)
2015-12-23 11:35:07 +00:00
yieldToken(tOpeningBrace)
of ']':
2015-12-23 11:35:07 +00:00
yieldToken(tClosingBracket)
if flowDepth > 0:
inc(flowDepth, -1)
of '}':
2015-12-23 11:35:07 +00:00
yieldToken(tClosingBrace)
if flowDepth > 0:
2015-11-27 22:42:11 +00:00
inc(flowDepth, -1)
of '#':
lastSpecialChar = '#'
of '"':
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylDoublyQuotedScalar
of '\'':
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylSingleQuotedScalar
of '!':
my.column = curPos
2015-12-07 18:09:02 +00:00
lastSpecialChar = '!'
2015-11-27 22:42:11 +00:00
of '&':
my.column = curPos
2015-11-29 21:43:10 +00:00
state = ylAnchor
2015-11-27 22:42:11 +00:00
of '*':
my.column = curPos
2015-11-29 21:43:10 +00:00
state = ylAlias
2015-11-27 22:42:11 +00:00
of ' ':
discard
of '-':
if flowDepth == 0:
2015-11-27 22:42:11 +00:00
lastSpecialChar = '-'
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
2015-11-27 22:42:11 +00:00
of '?', ':':
my.column = curPos
2015-11-27 22:42:11 +00:00
lastSpecialChar = c
of '|':
2015-12-23 11:35:07 +00:00
yieldToken(tPipe)
state = ylBlockScalarHeader
of '>':
2015-12-23 11:35:07 +00:00
yieldToken(tGreater)
state = ylBlockScalarHeader
2015-11-27 22:42:11 +00:00
of '\t':
discard
else:
my.content = "" & c
my.column = curPos
2015-11-27 22:42:11 +00:00
state = ylPlainScalar
2015-12-23 09:28:58 +00:00
typeHintState = ythInitial
advanceTypeHint(c)
of ylComment, ylDirectiveComment:
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tComment)
case state
of ylComment:
state = ylLineEnd
of ylDirectiveComment:
state = ylDirectiveLineEnd
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Should never happen")
2015-11-27 22:42:11 +00:00
continue
else:
my.content.add(c)
of ylIndentation:
2015-11-27 22:42:11 +00:00
case c
of EndOfFile, '\r', '\x0A':
lastIndentationLength = my.content.len
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
of ' ':
my.content.add(' ')
2015-11-27 22:42:11 +00:00
else:
lastIndentationLength = my.content.len
2015-12-23 11:35:07 +00:00
yieldToken(tLineStart)
if blockScalarIndentation != -1:
if lastIndentationLength <= blockScalarIndentation:
blockScalarIndentation = -1
else:
state = ylBlockScalar
continue
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
of ylTagHandle:
case c
of '!':
my.content.add(c)
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-11-27 22:42:11 +00:00
state = ylTagSuffix
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ' ', '\t', EndOfFile, '\r', '\x0A':
var suffix = my.content[1..^1]
my.content = "!"
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
my.content = suffix
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag handle: " & c)
my.content = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylTagSuffix:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of ' ', '\t', EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldToken(tTagSuffix)
state = ylInitialInLine
2015-11-29 15:50:27 +00:00
continue
2015-11-27 22:42:11 +00:00
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag suffix: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylVerbatimTag:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '>':
2015-12-23 11:35:07 +00:00
yieldToken(tVerbatimTag)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished verbatim tag")
2015-11-27 22:42:11 +00:00
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in tag URI: " & c)
my.content = ""
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDirective:
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
if my.content == "%YAML":
2015-12-23 11:35:07 +00:00
yieldToken(tYamlDirective)
2015-11-27 22:42:11 +00:00
state = ylMajorVersion
elif my.content == "%TAG":
2015-12-23 11:35:07 +00:00
yieldToken(tTagDirective)
2015-11-27 22:42:11 +00:00
state = ylDefineTagHandleInitial
else:
2015-12-23 11:35:07 +00:00
yieldToken(tUnknownDirective)
state = ylInitialUnknown
2015-11-27 22:42:11 +00:00
if c == EndOfFile:
continue
else:
my.content.add(c)
of ylInitialUnknown:
case c
of ' ', '\t':
discard
of '\r', '\x0A', EndOfFile:
state = ylDirectiveLineEnd
continue
of '#':
state = ylDirectiveComment
continue
else:
state = ylUnknownDirectiveParam
2015-11-29 15:50:27 +00:00
continue
of ylUnknownDirectiveParam:
case c
of '\r', '\x0A', EndOfFile, ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tUnknownDirectiveParam)
state = ylInitialUnknown
continue
else:
my.content.add(c)
of ylMajorVersion:
2015-11-27 22:42:11 +00:00
case c
of '0' .. '9':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '.':
2015-12-23 11:35:07 +00:00
yieldToken(tVersionPart)
state = ylMinorVersion
of EndOfFile, '\r', '\x0A', ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing YAML minor version.")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
of ylMinorVersion:
case c
of '0' .. '9':
my.content.add(c)
of EndOfFile, '\r', '\x0A', ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tVersionPart)
state = ylDirectiveLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
2015-11-27 22:42:11 +00:00
of ylDefineTagHandleInitial:
case c
of ' ', '\t':
discard
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
of '!':
my.content.add(c)
2015-11-27 22:42:11 +00:00
state = ylDefineTagHandle
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected character in %TAG directive: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDefineTagHandle:
case c
of '!':
my.content.add(c)
2015-12-23 11:35:07 +00:00
yieldToken(tTagHandle)
2015-11-27 22:42:11 +00:00
state = ylDefineTagURIInitial
of 'a' .. 'z', 'A' .. 'Z', '-':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of EndOfFile, '\r', '\x0A':
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected char in %TAG directive: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
of ylDefineTagURIInitial:
case c
of '\t', ' ':
my.content.add(c)
2015-11-27 22:42:11 +00:00
of '\x0A', '\r', EndOfFile:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
if my.content.len == 0:
2015-12-23 11:35:07 +00:00
yieldLexerError("Missing whitespace in %TAG directive")
my.content = ""
2015-11-27 22:42:11 +00:00
state = ylDefineTagURI
continue
of ylDefineTagURI:
case c
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@',
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
of '\x0A', '\r', EndOfFile, ' ', '\t':
2015-12-23 11:35:07 +00:00
yieldToken(tTagURI)
state = ylDirectiveLineEnd
2015-11-27 22:42:11 +00:00
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Invalid URI character: " & c)
state = ylInitialInLine
2015-11-27 22:42:11 +00:00
continue
of ylBlockScalarHeader:
case c
of '0' .. '9':
my.content = "" & c
2015-12-23 11:35:07 +00:00
yieldToken(tBlockIndentationIndicator)
2015-12-17 20:44:41 +00:00
of '+':
2015-12-23 11:35:07 +00:00
yieldToken(tPlus)
2015-12-17 20:44:41 +00:00
of '-':
2015-12-23 11:35:07 +00:00
yieldToken(tDash)
of '\r', '\x0A', EndOfFile:
blockScalarIndentation = lastIndentationLength
state = ylLineEnd
continue
else:
2015-12-23 11:35:07 +00:00
yieldLexerError("Unexpected character in block scalar header: " & c)
of ylBlockScalar:
case c
of EndOfFile, '\r', '\x0A':
2015-12-23 09:28:58 +00:00
yieldScalarPart()
state = ylLineEnd
continue
else:
my.content.add(c)
2015-11-29 21:43:10 +00:00
of ylAnchor:
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAnchor)
2015-11-29 21:43:10 +00:00
state = ylInitialInLine
continue
else:
my.content.add(c)
of ylAlias:
if lastSpecialChar != '\0':
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
state = ylInitialInLine
continue
else:
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
2015-11-29 21:43:10 +00:00
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
2015-11-29 21:43:10 +00:00
state = ylInitialInLine
continue
of ':':
lastSpecialChar = ':'
of ',':
if flowDepth > 0:
2015-12-23 11:35:07 +00:00
yieldToken(tAlias)
state = ylInitialInLine
continue
my.content.add(c)
2015-11-29 21:43:10 +00:00
else:
my.content.add(c)
2015-11-27 22:42:11 +00:00
my.bufpos += my.charlen
curPos.inc