# NimYAML - YAML implementation in Nim # (c) Copyright 2015 Felix Krause # # See the file "copying.txt", included in this # distribution, for details about the copyright. type Encoding = enum Unsupported, ## Unsupported encoding UTF8, ## UTF-8 UTF16LE, ## UTF-16 Little Endian UTF16BE, ## UTF-16 Big Endian UTF32LE, ## UTF-32 Little Endian UTF32BE ## UTF-32 Big Endian YamlLexerToken = enum # separating tokens tDirectivesEnd, tDocumentEnd, tStreamEnd, # tokens only in directives tTagDirective, tYamlDirective, tUnknownDirective, tVersionPart, tTagURI, tUnknownDirectiveParam, # tokens in directives and content tTagHandle, tComment, # from here on tokens only in content tLineStart, # control characters tColon, tDash, tQuestionmark, tComma, tOpeningBrace, tOpeningBracket, tClosingBrace, tClosingBracket, tPipe, tGreater, # block scalar header tBlockIndentationIndicator, tPlus, # scalar content tScalar, tScalarPart, # tags tVerbatimTag, tTagSuffix, # anchoring tAnchor, tAlias, # error reporting tError YamlLexerState = enum # initial states (not started reading any token) ylInitial, ylInitialSpaces, ylInitialUnknown, ylInitialContent, ylDefineTagHandleInitial, ylDefineTagURIInitial, ylInitialInLine, ylLineEnd, ylDirectiveLineEnd, # directive reading states ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion, ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment, # scalar reading states ylPlainScalar, ylPlainScalarNone, ylSingleQuotedScalar, ylDoublyQuotedScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader, ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar, # indentation ylIndentation, # comments ylComment, # tags ylTagHandle, ylTagSuffix, ylVerbatimTag, # document separation ylDashes, ylDots, # anchoring ylAnchor, ylAlias YamlTypeHintState = enum ythInitial, ythF, ythFA, ythFAL, ythFALS, ythFALSE, ythN, ythNU, ythNUL, ythNULL, ythNO, ythO, ythON, ythOF, ythOFF, ythT, ythTR, ythTRU, ythTRUE, ythY, ythYE, ythYES, ythPoint, ythPointI, ythPointIN, ythPointINF, ythPointN, ythPointNA, ythPointNAN, ythLowerFA, ythLowerFAL, ythLowerFALS, ythLowerNU, ythLowerNUL, ythLowerOF, ythLowerTR, ythLowerTRU, ythLowerYE, ythPointLowerIN, ythPointLowerN, ythPointLowerNA, ythMinus, yth0, ythInt, ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent, ythNone YamlLexer = object of BaseLexer indentations: seq[int] encoding: Encoding charlen: int charoffset: int content*: string # my.content of the last returned token. line*, column*: int typeHint*: YamlTypeHint const UTF8NextLine = toUTF8(Rune(0x85)) UTF8NonBreakingSpace = toUTF8(Rune(0xA0)) UTF8LineSeparator = toUTF8(Rune(0x2028)) UTF8ParagraphSeparator = toUTF8(Rune(0x2029)) proc detect_encoding(my: var YamlLexer) = var numBomChars = 0 my.encoding = Unsupported if my.bufpos == 3: # BaseLexer already skipped UTF-8 BOM my.encoding = UTF8 else: case my.buf[0] of '\0': if my.buf[1] == '\0': if my.buf[2] == '\0': my.encoding = UTF32LE elif my.buf[2] == '\xFE' and my.buf[3] == '\xFF': my.encoding = UTF32BE numBomChars = 4 else: # this is probably not a unicode character stream, # but we just use the next match in the table my.encoding = UTF16BE else: # this is how a BOM-less UTF16BE input should actually look like my.encoding = UTF16BE of '\xFF': case my.buf[1] of '\xFE': if my.buf[2] == '\0' and my.buf[3] == '\0': my.encoding = UTF32LE numBomChars = 4 else: my.encoding = UTF16LE numBomChars = 2 of '\0': my.encoding = UTF16LE else: my.encoding = UTF8 of '\xFE': case my.buf[1] of '\xFF': my.encoding = UTF16BE numBomChars = 2 of '\0': my.encoding = UTF16LE else: my.encoding = UTF8 else: if my.buf[1] == '\0': my.encoding = UTF16LE else: my.encoding = UTF8 inc(my.bufPos, numBomChars) my.charlen = case my.encoding of UTF8, Unsupported: 1 of UTF16LE, UTF16BE: 2 of UTF32LE, UTF32BE: 4 my.charoffset = case my.encoding of UTF8, Unsupported, UTF16LE, UTF32LE: 0 of UTF16BE: 1 of UTF32BE: 3 proc open(my: var YamlLexer, input: Stream) = lexbase.open(my, input) my.indentations = newSeq[int]() my.detect_encoding() my.content = "" my.line = 1 my.column = 1 template yieldToken(kind: YamlLexerToken) {.dirty.} = when defined(yamlDebug): if kind == tScalar: echo "Lexer token: tScalar(\"", my.content, "\")" else: echo "Lexer token: ", kind yield kind my.content = "" template yieldScalarPart() {.dirty.} = case typeHintState of ythNULL: my.typeHint = yTypeNull of ythTRUE, ythON, ythYES, ythY: my.typeHint = yTypeBoolTrue of ythFALSE, ythOFF, ythNO, ythN: my.typeHint = yTypeBoolFalse of ythInt, yth0: my.typeHint = yTypeInteger of ythDecimal, ythExponent: my.typeHint = yTypeFloat of ythPointINF: my.typeHint = yTypeFloatInf of ythPointNAN: my.typeHint = yTypeFloatNaN else: my.typeHint = yTypeUnknown when defined(yamlDebug): echo "Lexer token: tScalarPart(\"", my.content, "\".", typeHintState, ")" yield tScalarPart my.content = "" template yieldLexerError(message: string) {.dirty.} = when defined(yamlDebug): echo "Lexer error: " & message my.content = message my.column = curPos yield tError my.content = "" template handleCR() {.dirty.} = my.bufpos = lexbase.handleCR(my, my.bufpos + my.charoffset) + my.charlen - my.charoffset - 1 my.line.inc() curPos = 1 template handleLF() {.dirty.} = my.bufpos = lexbase.handleLF(my, my.bufpos + my.charoffset) + my.charlen - my.charoffset - 1 my.line.inc() curPos = 1 template `or`(r: Rune, i: int): Rune = cast[Rune](cast[int](r) or i) macro typeHintStateMachine(c: untyped, content: untyped): stmt = assert content.kind == nnkStmtList result = newNimNode(nnkCaseStmt, content).add(copyNimNode(c)) for branch in content.children: assert branch.kind == nnkOfBranch var charBranch = newNimNode(nnkOfBranch, branch) i = 0 stateBranches = newNimNode(nnkCaseStmt, branch).add( newIdentNode("typeHintState")) while branch[i].kind != nnkStmtList: charBranch.add(copyNimTree(branch[i])) inc(i) for rule in branch[i].children: assert rule.kind == nnkInfix assert ($rule[0].ident == "=>") var stateBranch = newNimNode(nnkOfBranch, rule) case rule[1].kind of nnkBracket: for item in rule[1].children: stateBranch.add(item) of nnkIdent: stateBranch.add(rule[1]) else: assert false if rule[2].kind == nnkNilLit: stateBranch.add(newStmtList(newNimNode(nnkDiscardStmt).add( newEmptyNode()))) else: stateBranch.add(newStmtList(newAssignment( newIdentNode("typeHintState"), copyNimTree(rule[2])))) stateBranches.add(stateBranch) stateBranches.add(newNimNode(nnkElse).add(newStmtList(newAssignment( newIdentNode("typeHintState"), newIdentNode("ythNone")), newAssignment(newIdentNode("state"), newIdentNode("ylPlainScalarNone"))))) charBranch.add(newStmtList(stateBranches)) result.add(charBranch) result.add(newNimNode(nnkElse).add(newStmtList(newAssignment( newIdentNode("typeHintState"), newIdentNode("ythNone")), newAssignment(newIdentNode("state"), newIdentNode("ylPlainScalarNone"))))) template advanceTypeHint(ch: char) {.dirty.} = typeHintStateMachine ch: of '.': [yth0, ythInt] => ythDecimal [ythInitial, ythMinus] => ythPoint of '+': ythNumE => ythNumEPlusMinus of '-': ythInitial => ythMinus ythNumE => ythNumEPlusMinus of '0': [ythInitial, ythMinus] => yth0 [ythNumE, ythNumEPlusMinus] => ythExponent of '1'..'9': [ythInitial, ythMinus] => ythInt [ythNumE, ythNumEPlusMinus] => ythExponent [ythInt, ythDecimal, ythExponent] => nil of 'a': ythF => ythLowerFA ythPointN => ythPointNA ythPointLowerN => ythPointLowerNA of 'A': ythF => ythFA ythPointN => ythPointNA of 'e': [yth0, ythInt, ythDecimal] => ythNumE ythLowerFALS => ythFALSE ythLowerTRU => ythTRUE ythY => ythLowerYE of 'E': [yth0, ythInt, ythDecimal] => ythNumE ythFALS => ythFALSE ythTRU => ythTRUE ythY => ythYE of 'f': ythInitial => ythF ythO => ythLowerOF ythLowerOF => ythOFF ythPointLowerIN => ythPointINF of 'F': ythInitial => ythF ythO => ythOF ythOF => ythOFF ythPointIN => ythPointINF of 'i', 'I': ythPoint => ythPointI of 'l': ythLowerNU => ythLowerNUL ythLowerNUL => ythNULL ythLowerFA => ythLowerFAL of 'L': ythNU => ythNUL ythNUL => ythNULL ythFA => ythFAL of 'n': ythInitial => ythN ythO => ythON ythPoint => ythPointLowerN ythPointI => ythPointLowerIN ythPointLowerNA => ythPointNAN of 'N': ythInitial => ythN ythO => ythON ythPoint => ythPointN ythPointI => ythPointIN ythPointNA => ythPointNAN of 'o', 'O': ythInitial => ythO ythN => ythNO of 'r': ythT => ythLowerTR of 'R': ythT => ythTR of 's': ythLowerFAL => ythLowerFALS ythLowerYE => ythYES of 'S': ythFAL => ythFALS ythYE => ythYES of 't', 'T': ythInitial => ythT of 'u': ythN => ythLowerNU ythLowerTR => ythLowerTRU of 'U': ythN => ythNU ythTR => ythTRU of 'y', 'Y': ythInitial => ythY iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} = var # the following three values are used for parsing escaped unicode chars unicodeChar: Rune = cast[Rune](0) escapeLength = 0 expectedEscapeLength = 0 trailingSpace = "" # used to temporarily store whitespace after a plain scalar lastSpecialChar: char = '\0' # stores chars that behave differently dependent on the following # char. handling will be deferred to next loop iteration. flowDepth = 0 # Lexer must know whether it parses block or flow style. Therefore, # it counts the number of open flow arrays / maps here state: YamlLexerState = ylInitial # lexer state typeHintState: YamlTypeHintState = ythInitial # for giving type hints of plain scalars lastIndentationLength = 0 # after parsing the indentation of the line, this will hold the # indentation length of the current line. Needed for checking where # a block scalar ends. blockScalarIndentation = -1 # when parsing a block scalar, this will be set to the indentation # of the line that starts the flow scalar. curPos = 1 while true: let c = my.buf[my.bufpos + my.charoffset] case state of ylInitial: case c of '%': state = ylDirective continue of ' ', '\t': state = ylInitialSpaces continue of '#': state = ylDirectiveComment else: state = ylInitialContent continue of ylInitialSpaces: case c of ' ', '\t': my.content.add(c) of '#': my.content = "" state = ylDirectiveComment of EndOfFile, '\r', '\x0A': state = ylDirectiveLineEnd continue else: state = ylIndentation continue of ylInitialContent: case c of '-': my.column = curPos state = ylDashes continue of '.': yieldToken(tLineStart) my.column = curPos state = ylDots continue else: state = ylIndentation continue of ylDashes: case c of '-': my.content.add(c) of ' ', '\t', '\r', '\x0A', EndOfFile: case my.content.len of 3: yieldToken(tDirectivesEnd) state = ylInitialInLine of 1: my.content = "" yieldToken(tLineStart) lastSpecialChar = '-' state = ylInitialInLine else: let tmp = my.content my.content = "" yieldToken(tLineStart) my.content = tmp my.column = curPos state = ylPlainScalarNone typeHintState = ythNone continue else: let tmp = my.content my.content = "" yieldToken(tLineStart) my.content = tmp if my.content.len == 1: typeHintState = ythMinus state = ylPlainScalar else: typeHintState = ythNone state = ylPlainScalarNone continue of ylDots: case c of '.': my.content.add(c) of ' ', '\t', '\r', '\x0A', EndOfFile: case my.content.len of 3: yieldToken(tDocumentEnd) state = ylDirectiveLineEnd else: state = ylPlainScalarNone typeHintState = ythNone continue else: state = ylPlainScalarNone typeHintState = ythNone continue of ylDirectiveLineEnd: case c of '\r': handleCR() state = ylInitial continue of '\x0A': handleLF() state = ylInitial continue of EndOfFile: yieldToken(tStreamEnd) break of ' ', '\t': discard of '#': state = ylDirectiveComment else: yieldLexerError("Unexpected content at end of directive: " & c) of ylLineEnd: case c of '\r': handleCR() of '\x0A': handleLF() of EndOfFile: yieldToken(tStreamEnd) break else: yieldLexerError("Internal error: Unexpected char at line end: " & c) state = ylInitialContent continue of ylSingleQuotedScalar: if lastSpecialChar != '\0': # ' is the only special char case c of '\'': my.content.add(c) lastSpecialChar = '\0' of EndOfFile, '\r', '\x0A': yieldToken(tScalar) lastSpecialChar = '\0' state = ylLineEnd continue else: yieldToken(tScalar) lastSpecialChar = '\0' state = ylSpaceAfterQuotedScalar continue else: case c of '\'': lastSpecialChar = c of EndOfFile: yieldLexerError("Unterminated single quoted string") yieldToken(tStreamEnd) break else: my.content.add(c) of ylDoublyQuotedScalar: case c of '"': yieldToken(tScalar) state = ylSpaceAfterQuotedScalar of EndOfFile: yieldLexerError("Unterminated doubly quoted string") yieldToken(tStreamEnd) break of '\\': state = ylEscape escapeLength = 0 of '\r': my.content.add("\x0A") handleCR() of '\x0A': my.content.add(c) handleLF() else: my.content.add(c) of ylEscape: if escapeLength == 0: expectedEscapeLength = 0 case c of EndOfFile: yieldLexerError("Unterminated doubly quoted string") of '0': my.content.add('\0') of 'a': my.content.add('\x07') of 'b': my.content.add('\x08') of '\t', 't': my.content.add('\t') of 'n': my.content.add('\x0A') of 'v': my.content.add('\v') of 'f': my.content.add('\f') of 'r': my.content.add('\r') of 'e': my.content.add('\e') of ' ': my.content.add(' ') of '"': my.content.add('"') of '/': my.content.add('/') of '\\': my.content.add('\\') of 'N': my.content.add(UTF8NextLine) of '_': my.content.add(UTF8NonBreakingSpace) of 'L': my.content.add(UTF8LineSeparator) of 'P': my.content.add(UTF8ParagraphSeparator) of 'x': unicodeChar = cast[Rune](0); expectedEscapeLength = 3 of 'u': unicodeChar = cast[Rune](0); expectedEscapeLength = 5 of 'U': unicodeChar = cast[Rune](0); expectedEscapeLength = 9 else: yieldLexerError("Unsupported escape sequence: \\" & c) if expectedEscapeLength == 0: state = ylDoublyQuotedScalar else: let digitPosition = expectedEscapeLength - escapeLength - 1 case c of EndOFFile: yieldLexerError("Unterminated escape sequence") state = ylLineEnd continue of '0' .. '9': unicodeChar = unicodechar or (cast[int](c) - 0x30) shl (digitPosition * 4) of 'A' .. 'F': unicodeChar = unicodechar or (cast[int](c) - 0x37) shl (digitPosition * 4) of 'a' .. 'f': unicodeChar = unicodechar or (cast[int](c) - 0x57) shl (digitPosition * 4) else: yieldLexerError("unsupported char in unicode escape sequence: " & c) escapeLength = 0 state = ylDoublyQuotedScalar continue inc(escapeLength) if escapeLength == expectedEscapeLength and escapeLength > 0: my.content.add(toUTF8(unicodeChar)) state = ylDoublyQuotedScalar of ylSpaceAfterQuotedScalar: case c of ' ', '\t': trailingSpace.add(c) of '#': if trailingSpace.len > 0: yieldLexerError("Missing space before comment start") state = ylComment trailingSpace = "" else: trailingSpace = "" state = ylInitialInLine continue of ylPlainScalar: case c of EndOfFile, '\r', '\x0A': yieldScalarPart() state = ylLineEnd continue of ':': lastSpecialChar = c state = ylSpaceAfterPlainScalar of ' ': state = ylSpaceAfterPlainScalar continue of ',': if flowDepth > 0: lastSpecialChar = c state = ylSpaceAfterPlainScalar else: my.content.add(c) state = ylPlainScalarNone typeHintState = ythNone of '[', ']', '{', '}': yieldScalarPart() state = ylInitialInLine continue else: advanceTypeHint(c) my.content.add(c) of ylPlainScalarNone: case c of EndOfFile, '\r', '\x0A': yieldScalarPart() state = ylLineEnd continue of ':': lastSpecialChar = c state = ylSpaceAfterPlainScalar of ' ': state = ylSpaceAfterPlainScalar continue of ',': if flowDepth > 0: lastSpecialChar = c state = ylSpaceAfterPlainScalar else: my.content.add(c) of '[', ']', '{', '}': yieldScalarPart() state = ylInitialInLine continue else: my.content.add(c) of ylSpaceAfterPlainScalar: if lastSpecialChar != '\0': case c of ' ', '\t', EndOfFile, '\r', '\x0A': yieldScalarPart() state = ylInitialInLine else: my.content.add(trailingSpace) my.content.add(lastSpecialChar) lastSpecialChar = '\0' trailingSpace = "" state = ylPlainScalarNone typeHintState = ythNone continue case c of EndOfFile, '\r', '\x0A': trailingSpace = "" yieldScalarPart() state = ylLineEnd continue of ' ', '\t': trailingSpace.add(c) of ',': if flowDepth > 0: lastSpecialChar = c else: my.content.add(trailingSpace) my.content.add(c) trailingSpace = "" state = ylPlainScalarNone typeHintState = ythNone of ':', '#': lastSpecialChar = c of '[', ']', '{', '}': yieldScalarPart() trailingSpace = "" state = ylInitialInLine continue else: my.content.add(trailingSpace) my.content.add(c) trailingSpace = "" state = ylPlainScalarNone typeHintState = ythNone of ylInitialInLine: if lastSpecialChar != '\0': my.column = curPos - 1 case c of ' ', '\t', '\r', '\x0A', EndOfFile: case lastSpecialChar of '#': my.content = "#" state = ylComment of ':': yieldToken(tColon) of '?': yieldToken(tQuestionmark) of '-': yieldToken(tDash) of ',': yieldToken(tComma) of '!': my.content = "!" yieldToken(tTagHandle) my.content = "" yieldToken(tTagSuffix) else: yieldLexerError("Unexpected special char: \"" & lastSpecialChar & "\"") lastSpecialChar = '\0' elif lastSpecialChar == '!': case c of '<': state = ylVerbatimTag lastSpecialChar = '\0' my.bufpos += my.charlen else: state = ylTagHandle my.content = "!" lastSpecialChar = '\0' my.column = curPos - 1 else: my.content.add(lastSpecialChar) advanceTypeHint(lastSpecialChar) lastSpecialChar = '\0' my.column = curPos - 1 state = ylPlainScalar typeHintState = ythInitial continue case c of '\r', '\x0A', EndOfFile: state = ylLineEnd continue of ',': if flowDepth > 0: yieldToken(tComma) else: my.content = "" & c my.column = curPos state = ylPlainScalar typeHintState = ythInitial advanceTypeHint(c) of '[': inc(flowDepth) yieldToken(tOpeningBracket) of '{': inc(flowDepth) yieldToken(tOpeningBrace) of ']': yieldToken(tClosingBracket) if flowDepth > 0: inc(flowDepth, -1) of '}': yieldToken(tClosingBrace) if flowDepth > 0: inc(flowDepth, -1) of '#': lastSpecialChar = '#' of '"': my.column = curPos state = ylDoublyQuotedScalar of '\'': my.column = curPos state = ylSingleQuotedScalar of '!': my.column = curPos lastSpecialChar = '!' of '&': my.column = curPos state = ylAnchor of '*': my.column = curPos state = ylAlias of ' ': discard of '-': if flowDepth == 0: lastSpecialChar = '-' else: my.content = "" & c my.column = curPos state = ylPlainScalar typeHintState = ythInitial advanceTypeHint(c) of '?', ':': my.column = curPos lastSpecialChar = c of '|': yieldToken(tPipe) state = ylBlockScalarHeader of '>': yieldToken(tGreater) state = ylBlockScalarHeader of '\t': discard else: my.content = "" & c my.column = curPos state = ylPlainScalar typeHintState = ythInitial advanceTypeHint(c) of ylComment, ylDirectiveComment: case c of EndOfFile, '\r', '\x0A': yieldToken(tComment) case state of ylComment: state = ylLineEnd of ylDirectiveComment: state = ylDirectiveLineEnd else: yieldLexerError("Should never happen") continue else: my.content.add(c) of ylIndentation: case c of EndOfFile, '\r', '\x0A': lastIndentationLength = my.content.len yieldToken(tLineStart) state = ylLineEnd continue of ' ': my.content.add(' ') else: lastIndentationLength = my.content.len yieldToken(tLineStart) if blockScalarIndentation != -1: if lastIndentationLength <= blockScalarIndentation: blockScalarIndentation = -1 else: state = ylBlockScalar continue state = ylInitialInLine continue of ylTagHandle: case c of '!': my.content.add(c) yieldToken(tTagHandle) state = ylTagSuffix of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-': my.content.add(c) of ' ', '\t', EndOfFile, '\r', '\x0A': var suffix = my.content[1..^1] my.content = "!" yieldToken(tTagHandle) my.content = suffix yieldToken(tTagSuffix) state = ylInitialInLine continue else: yieldLexerError("Invalid character in tag handle: " & c) my.content = "" state = ylInitialInLine of ylTagSuffix: case c of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')': my.content.add(c) of ' ', '\t', EndOfFile, '\r', '\x0A': yieldToken(tTagSuffix) state = ylInitialInLine continue else: yieldLexerError("Invalid character in tag suffix: " & c) state = ylInitialInLine of ylVerbatimTag: case c of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')': my.content.add(c) of '>': yieldToken(tVerbatimTag) state = ylInitialInLine of EndOfFile, '\r', '\x0A': yieldLexerError("Unfinished verbatim tag") state = ylLineEnd continue else: yieldLexerError("Invalid character in tag URI: " & c) my.content = "" state = ylInitialInLine of ylDirective: case c of ' ', '\t', '\r', '\x0A', EndOfFile: if my.content == "%YAML": yieldToken(tYamlDirective) state = ylMajorVersion elif my.content == "%TAG": yieldToken(tTagDirective) state = ylDefineTagHandleInitial else: yieldToken(tUnknownDirective) state = ylInitialUnknown if c == EndOfFile: continue else: my.content.add(c) of ylInitialUnknown: case c of ' ', '\t': discard of '\r', '\x0A', EndOfFile: state = ylDirectiveLineEnd continue of '#': state = ylDirectiveComment continue else: state = ylUnknownDirectiveParam continue of ylUnknownDirectiveParam: case c of '\r', '\x0A', EndOfFile, ' ', '\t': yieldToken(tUnknownDirectiveParam) state = ylInitialUnknown continue else: my.content.add(c) of ylMajorVersion: case c of '0' .. '9': my.content.add(c) of '.': yieldToken(tVersionPart) state = ylMinorVersion of EndOfFile, '\r', '\x0A', ' ', '\t': yieldLexerError("Missing YAML minor version.") state = ylDirectiveLineEnd continue else: yieldLexerError("Invalid character in YAML version: " & c) state = ylInitialUnknown of ylMinorVersion: case c of '0' .. '9': my.content.add(c) of EndOfFile, '\r', '\x0A', ' ', '\t': yieldToken(tVersionPart) state = ylDirectiveLineEnd continue else: yieldLexerError("Invalid character in YAML version: " & c) state = ylInitialUnknown of ylDefineTagHandleInitial: case c of ' ', '\t': discard of EndOfFile, '\r', '\x0A': yieldLexerError("Unfinished %TAG directive") state = ylDirectiveLineEnd continue of '!': my.content.add(c) state = ylDefineTagHandle else: yieldLexerError("Unexpected character in %TAG directive: " & c) state = ylInitialInLine of ylDefineTagHandle: case c of '!': my.content.add(c) yieldToken(tTagHandle) state = ylDefineTagURIInitial of 'a' .. 'z', 'A' .. 'Z', '-': my.content.add(c) of EndOfFile, '\r', '\x0A': yieldLexerError("Unfinished %TAG directive") state = ylDirectiveLineEnd continue else: yieldLexerError("Unexpected char in %TAG directive: " & c) state = ylInitialInLine of ylDefineTagURIInitial: case c of '\t', ' ': my.content.add(c) of '\x0A', '\r', EndOfFile: yieldLexerError("Unfinished %TAG directive") state = ylDirectiveLineEnd continue else: if my.content.len == 0: yieldLexerError("Missing whitespace in %TAG directive") my.content = "" state = ylDefineTagURI continue of ylDefineTagURI: case c of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')': my.content.add(c) of '\x0A', '\r', EndOfFile, ' ', '\t': yieldToken(tTagURI) state = ylDirectiveLineEnd continue else: yieldLexerError("Invalid URI character: " & c) state = ylInitialInLine continue of ylBlockScalarHeader: case c of '0' .. '9': my.content = "" & c yieldToken(tBlockIndentationIndicator) of '+': yieldToken(tPlus) of '-': yieldToken(tDash) of '\r', '\x0A', EndOfFile: blockScalarIndentation = lastIndentationLength state = ylLineEnd continue else: yieldLexerError("Unexpected character in block scalar header: " & c) of ylBlockScalar: case c of EndOfFile, '\r', '\x0A': yieldScalarPart() state = ylLineEnd continue else: my.content.add(c) of ylAnchor: case c of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']': yieldToken(tAnchor) state = ylInitialInLine continue else: my.content.add(c) of ylAlias: if lastSpecialChar != '\0': case c of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']': yieldToken(tAlias) state = ylInitialInLine continue else: my.content.add(lastSpecialChar) lastSpecialChar = '\0' case c of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']': yieldToken(tAlias) state = ylInitialInLine continue of ':': lastSpecialChar = ':' of ',': if flowDepth > 0: yieldToken(tAlias) state = ylInitialInLine continue my.content.add(c) else: my.content.add(c) my.bufpos += my.charlen curPos.inc