NimYAML/yaml/private/lex.nim

1177 lines
36 KiB
Nim
Raw Normal View History

2016-09-10 10:30:40 +02:00
# NimYAML - YAML implementation in Nim
# (c) Copyright 2015-2023 Felix Krause
2016-09-10 10:30:40 +02:00
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
2016-09-10 13:38:42 +02:00
import lexbase, streams, strutils, unicode
2020-11-03 21:17:31 +01:00
import ../data
when defined(yamlDebug):
import terminal
export terminal
2016-09-10 10:30:40 +02:00
type
2020-11-03 21:17:31 +01:00
Lexer* = object
cur*: Token
curStartPos*, curEndPos*: Mark
flowDepth*: int
2020-10-29 23:34:28 +01:00
# recently read scalar or URI, if any
2020-11-03 21:17:31 +01:00
evaluated*: string
# internals
2020-11-04 19:32:09 +01:00
indentation: int
2020-10-29 23:34:28 +01:00
source: BaseLexer
tokenStart: int
2020-11-03 21:17:31 +01:00
state, lineStartState, jsonEnablingState: State
2016-09-10 12:38:03 +02:00
c: char
2020-10-29 23:34:28 +01:00
seenMultiline: bool
# indentation of recently started set of node properties.
# necessary for implicit keys with properties.
propertyIndentation: int
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
LexerError* = object of ValueError
line*, column*: Positive
lineContent*: string
State = proc(lex: var Lexer): bool {.gcSafe, nimcall, raises: [LexerError].}
2020-11-03 21:17:31 +01:00
Token* {.pure.} = enum
YamlDirective, # `%YAML`
TagDirective, # `%TAG`
UnknownDirective, # any directive but `%YAML` and `%TAG`
DirectiveParam, # parameters of %YAML and unknown directives
EmptyLine, # for line folding in multiline plain scalars
DirectivesEnd, # explicit `---`
DocumentEnd, # explicit `...`
StreamEnd, # end of input
Indentation, # beginning of non-empty line
Plain, SingleQuoted, DoubleQuoted, Literal, Folded,
SeqItemInd, # block sequence item indicator `- `
MapKeyInd, # block mapping key indicator `? `
MapValueInd # block mapping value indicator `: `
MapStart, MapEnd, SeqStart, SeqEnd, SeqSep # {}[],
TagHandle, # a handle of a tag, e.g. `!!` of `!!str`
Suffix, # suffix of a tag shorthand, e.g. `str` of `!!str`.
2020-10-29 23:34:28 +01:00
# also used for the URI of the %TAG directive
2020-11-03 21:17:31 +01:00
VerbatimTag, # a verbatim tag, e.g. `!<tag:yaml.org,2002:str>`
Anchor, # anchor property of a node, e.g. `&anchor`
Alias # alias node, e.g. `*alias`
2016-09-10 10:30:40 +02:00
ChompType* = enum
ctKeep, ctClip, ctStrip
2016-09-10 10:30:40 +02:00
2020-10-29 23:34:28 +01:00
LineStartType = enum
lsDirectivesEndMarker, lsDocumentEndMarker, lsComment,
lsNewline, lsStreamEnd, lsContent
2016-09-11 11:28:05 +02:00
# consts
2016-09-10 10:30:40 +02:00
const
space = {' ', '\t'}
lineEnd = {'\l', '\c', EndOfFile}
spaceOrLineEnd = {' ', '\t', '\l', '\c', EndOfFile}
2020-10-29 23:34:28 +01:00
commentOrLineEnd = {'\l', '\c', EndOfFile, '#'}
2016-09-10 10:30:40 +02:00
digits = {'0'..'9'}
flowIndicators = {'[', ']', '{', '}', ','}
2016-09-11 12:52:24 +02:00
uriChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':',
'@', '&', '-', '=', '+', '$', '_', '.', '~', '*', '\'', '(', ')'}
2020-10-29 23:34:28 +01:00
tagShorthandChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-'}
2020-11-03 21:17:31 +01:00
nodePropertyKind* = {Token.TagHandle, Token.VerbatimTag, Token.Anchor}
scalarTokenKind* = {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted,
Token.Literal, Token.Folded}
2016-09-11 11:28:05 +02:00
2016-09-10 13:38:42 +02:00
UTF8NextLine = toUTF8(0x85.Rune)
UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
UTF8LineSeparator = toUTF8(0x2028.Rune)
UTF8ParagraphSeparator = toUTF8(0x2029.Rune)
2016-09-11 11:28:05 +02:00
UnknownIndentation* = int.low
2016-09-10 10:30:40 +02:00
proc currentIndentation*(lex: Lexer): int =
2020-11-04 19:32:09 +01:00
return lex.source.getColNumber(lex.source.bufpos) - 1
proc recentIndentation*(lex: Lexer): int =
return lex.indentation
2020-10-29 23:34:28 +01:00
# lexer source handling
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc advance(lex: var Lexer, step: int = 1) {.inline.} =
2020-10-29 23:34:28 +01:00
lex.c = lex.source.buf[lex.source.bufpos]
2020-11-04 19:32:09 +01:00
lex.source.bufpos.inc(step)
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
template lexCR(lex: var Lexer) =
2020-11-04 19:32:09 +01:00
try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos - 1)
2023-03-18 13:54:45 +01:00
except CatchableError as ce:
var e = lex.generateError("Encountered stream error: " & ce.msg)
e.parent = ce
raise e
2020-11-04 19:32:09 +01:00
lex.advance()
2016-09-10 12:38:03 +02:00
2020-11-03 21:17:31 +01:00
template lexLF(lex: var Lexer) =
2020-11-04 19:32:09 +01:00
try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos - 1)
2023-03-18 13:54:45 +01:00
except CatchableError as ce:
var e = generateError(lex, "Encountered stream error: " & ce.msg)
e.parent = ce
raise e
2020-11-04 19:32:09 +01:00
lex.advance()
2016-09-10 12:38:03 +02:00
2020-11-03 21:17:31 +01:00
template lineNumber(lex: Lexer): Positive =
2020-10-29 23:34:28 +01:00
lex.source.lineNumber
2016-09-10 12:38:03 +02:00
2020-11-03 21:17:31 +01:00
template columnNumber(lex: Lexer): Positive =
2020-11-04 19:32:09 +01:00
lex.source.getColNumber(lex.source.bufpos)
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
template currentLine(lex: Lexer): string =
2020-10-29 23:34:28 +01:00
lex.source.getCurrentLine(true)
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
proc isPlainSafe(lex: Lexer): bool {.inline.} =
2020-11-04 19:32:09 +01:00
case lex.source.buf[lex.source.bufpos]
2016-09-11 11:28:05 +02:00
of spaceOrLineEnd: result = false
2020-10-29 23:34:28 +01:00
of flowIndicators: result = lex.flowDepth == 0
2016-09-11 11:28:05 +02:00
else: result = true
# lexer states
{.push gcSafe.}
# `raises` cannot be pushed.
2020-11-03 21:17:31 +01:00
proc outsideDoc(lex: var Lexer): bool {.raises: [].}
proc yamlVersion(lex: var Lexer): bool {.raises: LexerError.}
proc tagShorthand(lex: var Lexer): bool {.raises: LexerError.}
proc tagUri(lex: var Lexer): bool {.raises: LexerError.}
proc unknownDirParams(lex: var Lexer): bool {.raises: [].}
proc expectLineEnd(lex: var Lexer): bool {.raises: LexerError.}
proc lineStart(lex: var Lexer): bool {.raises: LexerError.}
proc flowLineStart(lex: var Lexer): bool {.raises: LexerError.}
proc flowLineIndentation(lex: var Lexer): bool {.raises: LexerError.}
proc insideLine(lex: var Lexer): bool {.raises: LexerError.}
proc indentationSettingToken(lex: var Lexer): bool {.raises: LexerError.}
proc afterToken(lex: var Lexer): bool {.raises: LexerError.}
proc beforeIndentationSettingToken(lex: var Lexer): bool {.raises: LexerError.}
proc afterJsonEnablingToken(lex: var Lexer): bool {.raises: LexerError.}
proc lineIndentation(lex: var Lexer): bool {.raises: [].}
proc lineDirEnd(lex: var Lexer): bool {.raises: [].}
proc lineDocEnd(lex: var Lexer): bool {.raises: [].}
proc atSuffix(lex: var Lexer): bool {.raises: [LexerError].}
2020-11-03 21:17:31 +01:00
proc streamEnd(lex: var Lexer): bool {.raises: [].}
{.pop.}
2016-09-11 11:28:05 +02:00
2020-10-29 23:34:28 +01:00
# helpers
2016-09-11 11:28:05 +02:00
2020-11-14 10:58:52 -08:00
template debug*(message: string) =
2016-09-11 11:28:05 +02:00
when defined(yamlDebug):
2020-11-14 10:58:52 -08:00
when nimvm:
echo "yamlDebug: ", message
else:
try: styledWriteLine(stdout, fgBlue, message)
except ValueError, IOError: discard
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
proc generateError(lex: Lexer, message: string):
ref LexerError {.raises: [].} =
result = (ref LexerError)(
msg: message, line: lex.lineNumber(), column: lex.columnNumber(),
lineContent: lex.currentLine())
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc startToken(lex: var Lexer) {.inline.} =
lex.curStartPos = Mark(line: lex.lineNumber(), column: lex.columnNumber())
2020-10-29 23:34:28 +01:00
lex.tokenStart = lex.source.bufpos
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc endToken(lex: var Lexer) {.inline.} =
lex.curEndPos = Mark(line: lex.lineNumber(), column: lex.columnNumber())
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc readNumericSubtoken(lex: var Lexer) {.inline.} =
2020-10-29 23:34:28 +01:00
if lex.c notin digits:
raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c))
2016-09-10 10:30:40 +02:00
while true:
2020-10-29 23:34:28 +01:00
lex.advance()
if lex.c notin digits: break
2020-11-03 21:17:31 +01:00
proc isDirectivesEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
var peek = lex.source.bufpos
if lex.source.buf[peek] == '-':
peek += 1
if lex.source.buf[peek] == '-':
peek += 1
if lex.source.buf[peek] in spaceOrLineEnd:
lex.source.bufpos = peek
lex.advance()
return true
return false
2020-11-03 21:17:31 +01:00
proc isDocumentEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
var peek = lex.source.bufpos
if lex.source.buf[peek] == '.':
peek += 1
if lex.source.buf[peek] == '.':
peek += 1
if lex.source.buf[peek] in spaceOrLineEnd:
lex.source.bufpos = peek
lex.advance()
return true
return false
2020-11-03 21:17:31 +01:00
proc readHexSequence(lex: var Lexer, len: int) =
2020-10-29 23:34:28 +01:00
var charPos = 0
for i in countup(0, len-1):
lex.advance()
2020-11-04 19:32:09 +01:00
let digitPosition = len - i - 1
2016-09-10 10:30:40 +02:00
case lex.c
2020-11-04 19:32:09 +01:00
of lineEnd:
raise lex.generateError("Unfinished unicode escape sequence")
of '0'..'9':
charPos = charPos or (int(lex.c) - 0x30) shl (digitPosition * 4)
2020-10-29 23:34:28 +01:00
of 'A' .. 'F':
2020-11-04 19:32:09 +01:00
charPos = charPos or (int(lex.c) - 0x37) shl (digitPosition * 4)
of 'a' .. 'f':
charPos = charPos or (int(lex.c) - 0x57) shl (digitPosition * 4)
else:
raise lex.generateError("Invalid character in hex escape sequence: " &
escape("" & lex.c))
lex.evaluated.add(toUTF8(Rune(charPos)))
2020-10-29 23:34:28 +01:00
proc readURI(lex: var Lexer, verbatim: bool) =
2020-11-03 21:17:31 +01:00
lex.evaluated.setLen(0)
2020-10-29 23:34:28 +01:00
let endWithSpace = lex.c != '<'
let restricted = lex.flowDepth > 0 and not verbatim
2020-10-29 23:34:28 +01:00
var literalStart: int
if endWithSpace:
if not restricted and lex.c in {'[', ']', ','}:
raise lex.generateError("Flow indicator cannot start tag prefix")
literalStart = lex.source.bufpos - 1
else:
literalStart = lex.source.bufpos
lex.advance()
2016-09-10 10:30:40 +02:00
while true:
case lex.c
2020-10-29 23:34:28 +01:00
of spaceOrLineEnd:
if endWithSpace:
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
break
raise lex.generateError("Unclosed verbatim tag")
of '%':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.readHexSequence(2)
literalStart = lex.source.bufpos
of uriChars: discard
of '[', ']', ',':
if restricted:
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
break
of '!':
if restricted:
raise lex.generateError("Illegal '!' in tag suffix")
of '>':
if endWithSpace:
raise lex.generateError("Illegal character in URI: `>`")
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.advance()
2016-09-10 10:30:40 +02:00
break
2020-10-29 23:34:28 +01:00
else:
raise lex.generateError("Illegal character in URI: " & escape("" & lex.c))
lex.advance()
2020-11-03 21:17:31 +01:00
proc endLine(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
while true:
case lex.c
2016-09-10 10:30:40 +02:00
of '\l':
2020-10-29 23:34:28 +01:00
lex.lexLF()
lex.state = lex.lineStartState
2016-09-10 10:30:40 +02:00
break
of '\c':
2020-10-29 23:34:28 +01:00
lex.lexCR()
lex.state = lex.lineStartState
2016-09-10 10:30:40 +02:00
break
2020-10-29 23:34:28 +01:00
of EndOfFile:
lex.state = streamEnd
break
of '#':
while true:
lex.advance()
if lex.c in lineEnd: break
else: discard
2020-11-03 21:17:31 +01:00
proc startLine(lex: var Lexer): LineStartType =
2016-09-10 10:30:40 +02:00
case lex.c
of '-':
2020-10-29 23:34:28 +01:00
return if lex.isDirectivesEnd(): lsDirectivesEndMarker
else: lsContent
2016-09-10 10:30:40 +02:00
of '.':
2020-10-29 23:34:28 +01:00
return if lex.isDocumentEnd(): lsDocumentEndMarker
else: lsContent
else:
2020-10-29 23:34:28 +01:00
while lex.c == ' ': lex.advance()
2023-03-10 22:21:18 +01:00
if lex.c == '\t':
var peek = lex.source.bufpos
while lex.source.buf[peek] in space:
peek += 1
if lex.source.buf[peek] in commentOrLineEnd:
lex.source.bufpos = peek + 1
lex.c = lex.source.buf[peek]
else:
return lsContent
2020-10-29 23:34:28 +01:00
return case lex.c
of '#': lsComment
of '\l', '\c': lsNewline
of EndOfFile: lsStreamEnd
else: lsContent
2020-11-03 21:17:31 +01:00
proc readPlainScalar(lex: var Lexer) =
lex.evaluated.setLen(0)
2020-10-29 23:34:28 +01:00
let afterNewlineState = if lex.flowDepth == 0: lineIndentation
else: flowLineIndentation
var lineStartPos: int
lex.seenMultiline = false
lex.startToken()
if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1
2020-11-03 21:17:31 +01:00
lex.cur = Token.Plain
2020-10-29 23:34:28 +01:00
block multilineLoop:
while true:
lineStartPos = lex.source.bufpos - 1
block inlineLoop:
while true:
lex.advance()
case lex.c
of space:
2020-10-29 23:34:28 +01:00
lex.endToken()
let spaceStart = lex.source.bufpos - 2
2020-10-29 23:34:28 +01:00
block spaceLoop:
while true:
lex.advance()
case lex.c
of '\l', '\c':
lex.evaluated.add(lex.source.buf[lineStartPos..spaceStart])
break inlineLoop
of EndOfFile:
lex.evaluated.add(lex.source.buf[lineStartPos..spaceStart])
lex.state = streamEnd
2020-10-29 23:34:28 +01:00
break multilineLoop
of '#':
lex.evaluated.add(lex.source.buf[lineStartPos..spaceStart])
lex.state = expectLineEnd
2020-10-29 23:34:28 +01:00
break multilineLoop
of ':':
if not lex.isPlainSafe():
lex.evaluated.add(lex.source.buf[lineStartPos..spaceStart])
lex.state = insideLine
break multilineLoop
break spaceLoop
of flowIndicators:
if lex.flowDepth > 0:
lex.evaluated.add(lex.source.buf[lineStartPos..spaceStart])
lex.state = insideLine
break multilineLoop
break spaceLoop
of space: discard
else: break spaceLoop
2020-10-29 23:34:28 +01:00
of ':':
2020-11-03 21:17:31 +01:00
if not lex.isPlainSafe():
lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = insideLine
break multilineLoop
of flowIndicators:
if lex.flowDepth > 0:
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = insideLine
break multilineLoop
of '\l', '\c':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.endToken()
break inlineLoop
of EndOfFile:
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
2020-11-04 19:32:09 +01:00
if lex.currentIndentation() > 0:
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = streamEnd
break multilineLoop
else: discard
lex.endLine()
var newlines = 1
block newlineLoop:
while true:
case lex.startLine()
of lsContent:
2020-11-04 19:32:09 +01:00
if lex.currentIndentation() <= lex.indentation:
2020-10-29 23:34:28 +01:00
lex.state = afterNewlineState
break multilineLoop
if lex.c == '\t':
while lex.c in space: lex.advance()
case lex.c:
of '#':
lex.endLine()
lex.state = lineStart
break multilineLoop
of '\l', '\c':
lex.endLine()
newlines += 1
continue
else: discard
2020-10-29 23:34:28 +01:00
break newlineLoop
of lsDirectivesEndMarker:
lex.state = lineDirEnd
break multilineLoop
of lsDocumentEndMarker:
lex.state = lineDocEnd
break multilineLoop
of lsStreamEnd:
break multilineLoop
of lsComment:
lex.endLine()
lex.state = lineStart
break multilineLoop
of lsNewline: lex.endLine()
newlines += 1
while lex.c in space: lex.advance()
2020-11-03 21:17:31 +01:00
if (lex.c == ':' and not lex.isPlainSafe()) or
2020-10-29 23:34:28 +01:00
lex.c == '#' or (lex.c in flowIndicators and
lex.flowDepth > 0):
lex.state = afterNewlineState
break multilineLoop
lex.seenMultiline = true
2020-11-03 21:17:31 +01:00
if newlines == 1: lex.evaluated.add(' ')
2020-10-29 23:34:28 +01:00
else:
2020-11-03 21:17:31 +01:00
for i in countup(2, newlines): lex.evaluated.add('\l')
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc streamEndAfterBlock(lex: var Lexer) =
2020-11-04 19:32:09 +01:00
if lex.currentIndentation() != 0:
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.curEndPos.column -= 1
2016-09-10 10:30:40 +02:00
proc dirEndFollows(lex: Lexer): bool =
return lex.c == '-' and lex.source.buf[lex.source.bufpos] == '-' and
lex.source.buf[lex.source.bufpos+1] == '-'
proc docEndFollows(lex: Lexer): bool =
return lex.c == '.' and lex.source.buf[lex.source.bufpos] == '.' and
lex.source.buf[lex.source.bufpos+1] == '.'
2016-09-10 10:30:40 +02:00
2020-11-03 21:17:31 +01:00
proc readBlockScalar(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
var
chomp = ctClip
indent = 0
separationLines = 0
contentStart: int
2023-03-10 22:21:18 +01:00
hasBody = true
2020-10-29 23:34:28 +01:00
lex.startToken()
2020-11-03 21:17:31 +01:00
lex.cur = if lex.c == '>': Token.Folded else: Token.Literal
lex.evaluated.setLen(0)
2020-10-29 23:34:28 +01:00
# header
while true:
lex.advance()
case lex.c
of '+':
if chomp != ctClip:
raise lex.generateError("Multiple chomping indicators")
chomp = ctKeep
of '-':
if chomp != ctClip:
raise lex.generateError("Multiple chomping indicators")
chomp = ctStrip
of '1' .. '9':
if indent != 0:
raise lex.generateError("Multiple indentation indicators")
indent = max(0, lex.indentation) + int(lex.c) - int('0')
of ' ':
while true:
lex.advance()
if lex.c != ' ': break
if lex.c notin commentOrLineEnd:
raise lex.generateError("Illegal character after block scalar header: " &
escape("" & lex.c))
break
2023-03-10 22:21:18 +01:00
of EndOfFile:
hasBody = false
break
of '\l', '\c': break
2020-10-29 23:34:28 +01:00
else:
raise lex.generateError("Illegal character in block scalar header: " &
escape("" & lex.c))
lex.endLine()
2016-09-10 13:38:42 +02:00
2020-10-29 23:34:28 +01:00
block body:
# determining indentation and leading empty lines
var
maxLeadingSpaces = 0
moreIndented = false
2016-09-10 13:38:42 +02:00
while true:
2020-10-29 23:34:28 +01:00
if indent == 0:
while lex.c == ' ': lex.advance()
else:
2020-11-04 19:32:09 +01:00
maxLeadingSpaces = lex.currentIndentation() + indent
while lex.c == ' ' and lex.currentIndentation() < maxLeadingSpaces:
2020-10-29 23:34:28 +01:00
lex.advance()
2016-09-10 13:38:42 +02:00
case lex.c
2020-10-29 23:34:28 +01:00
of '\l', '\c':
lex.endToken()
2020-11-04 19:32:09 +01:00
maxLeadingSpaces = max(maxLeadingSpaces, lex.currentIndentation())
2020-10-29 23:34:28 +01:00
lex.endLine()
separationLines += 1
of EndOfFile:
lex.state = streamEnd
lex.streamEndAfterBlock()
2023-03-10 22:21:18 +01:00
if lex.source.getColNumber(lex.source.bufpos) > 1 and hasBody: separationLines += 1
2020-10-29 23:34:28 +01:00
break body
2016-09-10 13:38:42 +02:00
else:
2020-10-29 23:34:28 +01:00
if indent == 0:
2020-11-04 19:32:09 +01:00
indent = lex.currentIndentation()
if indent <= lex.indentation or
(indent == 0 and (lex.dirEndFollows() or lex.docEndFollows())):
2020-10-29 23:34:28 +01:00
lex.state = lineIndentation
break body
elif indent < maxLeadingSpaces:
raise lex.generateError("Leading all-spaces line contains too many spaces")
2020-11-04 19:32:09 +01:00
elif lex.currentIndentation() < indent: break body
if lex.cur == Token.Folded and lex.c in space:
moreIndented = true
2016-09-10 13:38:42 +02:00
break
2020-10-29 23:34:28 +01:00
for i in countup(0, separationLines - 1):
2020-11-03 21:17:31 +01:00
lex.evaluated.add('\l')
separationLines = if moreIndented: 1 else: 0
2020-10-29 23:34:28 +01:00
block content:
while true:
2020-11-04 19:32:09 +01:00
contentStart = lex.source.bufpos - 1
while lex.c notin lineEnd: lex.advance()
lex.evaluated.add(lex.source.buf[contentStart .. lex.source.bufpos - 2])
if lex.c == EndOfFile:
2020-10-29 23:34:28 +01:00
lex.state = streamEnd
lex.streamEndAfterBlock()
break body
2020-11-04 19:32:09 +01:00
separationLines += 1
lex.endToken()
lex.endLine()
let oldMoreIndented = moreIndented
2020-11-04 19:32:09 +01:00
# empty lines and indentation of next line
moreIndented = false
2020-11-04 19:32:09 +01:00
while true:
while lex.c == ' ' and lex.currentIndentation() < indent:
lex.advance()
case lex.c
of '\l', '\c':
lex.endToken()
separationLines += 1
lex.endLine()
of EndOfFile:
lex.state = streamEnd
lex.streamEndAfterBlock()
break body
else:
if lex.currentIndentation() < indent or
(indent == 0 and (lex.dirEndFollows() or lex.docEndFollows())):
2020-11-04 19:32:09 +01:00
break content
if lex.cur == Token.Folded and lex.c in space:
moreIndented = true
if not oldMoreIndented:
separationLines += 1
break
2020-11-04 19:32:09 +01:00
# line folding
if lex.cur == Token.Literal:
for i in countup(0, separationLines - 1):
lex.evaluated.add('\l')
elif separationLines == 1:
lex.evaluated.add(' ')
2020-10-29 23:34:28 +01:00
else:
2020-11-04 19:32:09 +01:00
for i in countup(0, separationLines - 2):
lex.evaluated.add('\l')
separationLines = if moreIndented: 1 else: 0
2020-10-29 23:34:28 +01:00
let markerFollows = lex.currentIndentation() == 0 and
(lex.dirEndFollows() or lex.docEndFollows())
if lex.c == '#':
lex.state = expectLineEnd
elif lex.currentIndentation() > lex.indentation and not markerFollows:
raise lex.generateError("This line #" & $lex.curStartPos.line & " at " & escape("" & lex.c) & " is less indented than necessary")
elif lex.currentIndentation() == 0:
2020-10-29 23:34:28 +01:00
lex.state = lineStart
else:
lex.state = lineIndentation
lex.endToken()
case chomp
of ctStrip: discard
of ctClip:
2023-03-10 22:21:18 +01:00
if len(lex.evaluated) > 0: lex.evaluated.add('\l')
2020-10-29 23:34:28 +01:00
of ctKeep:
for i in countup(0, separationLines - 1):
2020-11-03 21:17:31 +01:00
lex.evaluated.add('\l')
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc processQuotedWhitespace(lex: var Lexer, initial: int) =
2020-10-29 23:34:28 +01:00
var newlines = initial
let firstSpace = lex.source.bufpos - 1
while true:
case lex.c
of ' ', '\t': discard
2020-10-29 23:34:28 +01:00
of '\l':
lex.lexLF()
break
of '\c':
lex.lexCR()
break
else:
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[firstSpace..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
return
lex.advance()
lex.seenMultiline = true
while true:
case lex.startLine()
of lsContent, lsComment:
while lex.c in space: lex.advance()
if lex.c in {'\l', '\c'}:
lex.endLine()
else: break
2020-10-29 23:34:28 +01:00
of lsDirectivesEndMarker:
raise lex.generateError("Illegal `---` within quoted scalar")
of lsDocumentEndMarker:
raise lex.generateError("Illegal `...` within quoted scalar")
of lsNewline: lex.endLine()
of lsStreamEnd:
raise lex.generateError("Unclosed quoted string")
newlines += 1
if newlines == 0: discard
2020-11-03 21:17:31 +01:00
elif newlines == 1: lex.evaluated.add(' ')
2020-10-29 23:34:28 +01:00
else:
2020-11-03 21:17:31 +01:00
for i in countup(2, newlines): lex.evaluated.add('\l')
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc readSingleQuotedScalar(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
lex.seenMultiline = false
lex.startToken()
2020-11-03 21:17:31 +01:00
lex.evaluated.setLen(0)
2020-10-29 23:34:28 +01:00
if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1
var literalStart = lex.source.bufpos
lex.advance()
2016-09-10 13:38:42 +02:00
while true:
case lex.c
2020-10-29 23:34:28 +01:00
of EndOfFile:
raise lex.generateError("Unclosed quoted string")
2016-09-10 13:38:42 +02:00
of '\'':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.advance()
if lex.c == '\'':
2020-11-03 21:17:31 +01:00
lex.evaluated.add('\'')
2020-10-29 23:34:28 +01:00
literalStart = lex.source.bufpos
lex.advance()
2016-09-10 13:38:42 +02:00
else: break
of ' ', '\t', '\l', '\c':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.processQuotedWhitespace(1)
literalStart = lex.source.bufpos - 1
2016-09-10 13:38:42 +02:00
else:
2020-10-29 23:34:28 +01:00
lex.advance()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.SingleQuoted
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc readDoubleQuotedScalar(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
lex.seenMultiline = false
lex.startToken()
2020-11-03 21:17:31 +01:00
lex.evaluated.setLen(0)
2020-10-29 23:34:28 +01:00
if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1
var literalStart = lex.source.bufpos
lex.advance()
2016-09-10 13:38:42 +02:00
while true:
case lex.c
of EndOfFile:
2020-10-29 23:34:28 +01:00
raise lex.generateError("Unclosed quoted string")
2016-09-10 13:38:42 +02:00
of '\\':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.advance()
literalStart = lex.source.bufpos
2016-09-10 13:38:42 +02:00
case lex.c
2020-11-03 21:17:31 +01:00
of '0': lex.evaluated.add('\0')
of 'a': lex.evaluated.add('\a')
of 'b': lex.evaluated.add('\b')
of 't', '\t': lex.evaluated.add('\t')
of 'n': lex.evaluated.add('\l')
of 'v': lex.evaluated.add('\v')
of 'f': lex.evaluated.add('\f')
of 'r': lex.evaluated.add('\c')
of 'e': lex.evaluated.add('\e')
of ' ': lex.evaluated.add(' ')
of '"': lex.evaluated.add('"')
of '/': lex.evaluated.add('/')
of '\\':lex.evaluated.add('\\')
of 'N': lex.evaluated.add(UTF8NextLine)
of '_': lex.evaluated.add(UTF8NonBreakingSpace)
of 'L': lex.evaluated.add(UTF8LineSeparator)
of 'P': lex.evaluated.add(UTF8ParagraphSeparator)
2020-10-29 23:34:28 +01:00
of 'x':
lex.readHexSequence(2)
literalStart = lex.source.bufpos
of 'u':
lex.readHexSequence(4)
literalStart = lex.source.bufpos
of 'U':
lex.readHexSequence(8)
literalStart = lex.source.bufpos
2016-09-10 13:38:42 +02:00
of '\l', '\c':
2020-10-29 23:34:28 +01:00
lex.processQuotedWhitespace(0)
literalStart = lex.source.bufpos - 1
2016-09-10 13:38:42 +02:00
continue
2020-10-29 23:34:28 +01:00
else:
raise lex.generateError("Illegal character in escape sequence: " & escape("" & lex.c))
2016-09-10 13:38:42 +02:00
of '"':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2016-09-10 13:38:42 +02:00
break
of ' ', '\t', '\l', '\c':
2020-11-03 21:17:31 +01:00
lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.processQuotedWhitespace(1)
literalStart = lex.source.bufpos - 1
2016-09-10 13:38:42 +02:00
continue
2020-10-29 23:34:28 +01:00
else: discard
lex.advance()
lex.advance()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.DoubleQuoted
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc basicInit(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
lex.state = outsideDoc
lex.flowDepth = 0
lex.lineStartState = outsideDoc
lex.jsonEnablingState = afterToken
lex.propertyIndentation = -1
2020-11-03 21:17:31 +01:00
lex.evaluated = ""
2020-10-29 23:34:28 +01:00
lex.advance()
# interface
proc lastScalarWasMultiline*(lex: Lexer): bool =
2020-11-03 21:17:31 +01:00
result = lex.seenMultiline
proc shortLexeme*(lex: Lexer): string =
2020-10-29 23:34:28 +01:00
return lex.source.buf[lex.tokenStart..lex.source.bufpos-2]
proc fullLexeme*(lex: Lexer): string =
2020-10-29 23:34:28 +01:00
return lex.source.buf[lex.tokenStart - 1..lex.source.bufpos-2]
proc currentLine*(lex: Lexer): string =
2020-11-03 21:17:31 +01:00
return lex.source.getCurrentLine(false)
proc next*(lex: var Lexer) {.raises: [LexerError].}=
2020-10-29 23:34:28 +01:00
while not lex.state(lex): discard
2020-11-10 15:40:01 +01:00
debug("lexer -> [" & $lex.curStartPos.line & "," & $lex.curStartPos.column &
"-" & $lex.curEndPos.line & "," & $lex.curEndPos.column & "] " & $lex.cur)
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc init*(lex: var Lexer, source: Stream) {.raises: [IOError, OSError].} =
lex.source.open(source)
lex.basicInit()
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc init*(lex: var Lexer, source: string) {.raises: [].} =
2020-10-29 23:34:28 +01:00
try:
2020-11-03 21:17:31 +01:00
lex.source.open(newStringStream(source))
2023-03-18 13:54:45 +01:00
except CatchableError:
2020-10-29 23:34:28 +01:00
discard # can never happen with StringStream
2020-11-03 21:17:31 +01:00
lex.basicInit()
2016-09-10 12:38:03 +02:00
2020-10-29 23:34:28 +01:00
# states
2020-11-03 21:17:31 +01:00
proc outsideDoc(lex: var Lexer): bool =
2016-09-10 10:30:40 +02:00
case lex.c
2020-10-29 23:34:28 +01:00
of '%':
lex.startToken()
2016-09-10 10:30:40 +02:00
while true:
2020-10-29 23:34:28 +01:00
lex.advance()
if lex.c in spaceOrLineEnd: break
lex.endToken()
let name = lex.shortLexeme()
case name
of "YAML":
lex.state = yamlVersion
2020-11-03 21:17:31 +01:00
lex.cur = Token.YamlDirective
2020-10-29 23:34:28 +01:00
of "TAG":
lex.state = tagShorthand
2020-11-03 21:17:31 +01:00
lex.cur = Token.TagDirective
else:
2020-10-29 23:34:28 +01:00
lex.state = unknownDirParams
2020-11-03 21:17:31 +01:00
lex.cur = Token.UnknownDirective
lex.evaluated.setLen(0)
lex.evaluated.add(name)
2020-10-29 23:34:28 +01:00
of '-':
lex.startToken()
if lex.isDirectivesEnd():
lex.state = afterToken
2020-11-04 19:32:09 +01:00
lex.cur = Token.DirectivesEnd
2016-12-13 21:40:46 +01:00
else:
2020-10-29 23:34:28 +01:00
lex.state = indentationSettingToken
2020-11-03 21:17:31 +01:00
lex.cur = Token.Indentation
2020-10-29 23:34:28 +01:00
lex.lineStartState = lineStart
lex.indentation = -1
lex.endToken()
of '.':
lex.startToken()
if lex.isDocumentEnd():
lex.state = expectLineEnd
2020-11-03 21:17:31 +01:00
lex.cur = Token.DocumentEnd
2016-09-14 14:35:41 +02:00
else:
2020-10-29 23:34:28 +01:00
lex.state = indentationSettingToken
lex.lineStartState = lineStart
lex.indentation = -1
2020-11-03 21:17:31 +01:00
lex.cur = Token.Indentation
2020-10-29 23:34:28 +01:00
lex.endToken()
2016-09-13 20:19:02 +02:00
else:
2020-10-29 23:34:28 +01:00
lex.startToken()
while lex.c == ' ': lex.advance()
if lex.c in commentOrLineEnd:
lex.state = expectLineEnd
return false
2023-03-10 22:21:18 +01:00
if lex.c == '\t':
var peek = lex.source.bufpos
while lex.source.buf[peek] in space:
peek += 1
if lex.source.buf[peek] in commentOrLineEnd:
lex.state = expectLineEnd
lex.source.bufpos = peek
return false
2020-10-29 23:34:28 +01:00
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.Indentation
lex.indentation = -1
2020-10-29 23:34:28 +01:00
lex.state = indentationSettingToken
lex.lineStartState = lineStart
return true
2016-09-13 20:19:02 +02:00
2020-11-03 21:17:31 +01:00
proc yamlVersion(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c in space: lex.advance()
lex.startToken()
lex.readNumericSubtoken()
if lex.c != '.':
raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c))
lex.advance()
lex.readNumericSubtoken()
if lex.c notin spaceOrLineEnd:
raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c))
2020-11-03 21:17:31 +01:00
lex.cur = Token.DirectiveParam
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = expectLineEnd
2020-11-04 19:32:09 +01:00
return true
2016-09-19 19:33:29 +02:00
2020-11-03 21:17:31 +01:00
proc tagShorthand(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c in space: lex.advance()
if lex.c != '!':
raise lex.generateError("Illegal character, tag shorthand must start with '!': " & escape("" & lex.c))
lex.startToken()
lex.advance()
2016-09-11 12:52:24 +02:00
2020-10-29 23:34:28 +01:00
if lex.c in spaceOrLineEnd: discard
else:
while lex.c in tagShorthandChars: lex.advance()
if lex.c != '!':
if lex.c in spaceOrLineEnd:
raise lex.generateError("Tag shorthand must end with '!'.")
else:
raise lex.generateError("Illegal character in tag shorthand: " & escape("" & lex.c))
lex.advance()
if lex.c notin spaceOrLineEnd:
raise lex.generateError("Missing space after tag shorthand")
2020-11-03 21:17:31 +01:00
lex.cur = Token.TagHandle
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = tagUri
2020-11-04 19:32:09 +01:00
return true
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc tagUri(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c in space: lex.advance()
lex.startToken()
2016-09-11 12:52:24 +02:00
if lex.c == '<':
2020-10-29 23:34:28 +01:00
raise lex.generateError("Illegal character in tag URI: " & escape("" & lex.c))
lex.readUri(false)
2020-11-03 21:17:31 +01:00
lex.cur = Token.Suffix
2020-10-29 23:34:28 +01:00
lex.endToken()
lex.state = expectLineEnd
return true
2020-11-03 21:17:31 +01:00
proc unknownDirParams(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c in space: lex.advance()
if lex.c in lineEnd + {'#'}:
lex.state = expectLineEnd
return false
lex.startToken()
while true:
lex.advance()
if lex.c in lineEnd + {'#'}: break
2020-11-03 21:17:31 +01:00
lex.cur = Token.DirectiveParam
2020-10-29 23:34:28 +01:00
return true
2020-11-03 21:17:31 +01:00
proc expectLineEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c in space: lex.advance()
if lex.c notin commentOrLineEnd:
raise lex.generateError("Unexpected character (expected line end): " & escape("" & lex.c))
lex.endLine()
return false
2020-11-03 21:17:31 +01:00
proc lineStart(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
return case lex.startLine()
of lsDirectivesEndMarker: lex.lineDirEnd()
of lsDocumentEndMarker: lex.lineDocEnd()
of lsComment, lsNewline: lex.endLine(); false
of lsStreamEnd: lex.state = streamEnd; false
of lsContent:
if lex.flowDepth == 0: lex.lineIndentation()
else: lex.flowLineIndentation()
2020-10-29 23:34:28 +01:00
2020-11-03 21:17:31 +01:00
proc flowLineStart(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
var indent: int
case lex.c
of '-':
if lex.isDirectivesEnd():
raise lex.generateError("Directives end marker before end of flow content")
indent = 0
of '.':
if lex.isDocumentEnd():
raise lex.generateError("Document end marker before end of flow content")
indent = 0
else:
let lineStart = lex.source.bufpos
while lex.c == ' ': lex.advance()
indent = lex.source.bufpos - lineStart
while lex.c in space: lex.advance()
2023-03-10 22:21:18 +01:00
if lex.c in commentOrLineEnd:
lex.state = expectLineEnd
return false
2020-10-29 23:34:28 +01:00
if indent <= lex.indentation:
raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)")
lex.state = insideLine
return false
2020-11-03 21:17:31 +01:00
proc flowLineIndentation(lex: var Lexer): bool =
2020-11-04 19:32:09 +01:00
if lex.currentIndentation() < lex.indentation:
2020-10-29 23:34:28 +01:00
raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)")
lex.state = insideLine
return false
2020-11-03 21:17:31 +01:00
proc checkIndicatorChar(lex: var Lexer, kind: Token) =
if lex.isPlainSafe():
2020-10-29 23:34:28 +01:00
lex.readPlainScalar()
else:
lex.startToken()
lex.advance()
lex.endToken()
lex.cur = kind
lex.state = beforeIndentationSettingToken
2020-11-03 21:17:31 +01:00
proc enterFlowCollection(lex: var Lexer, kind: Token) =
2020-10-29 23:34:28 +01:00
lex.startToken()
if lex.flowDepth == 0:
lex.jsonEnablingState = afterJsonEnablingToken
lex.lineStartState = flowLineStart
lex.propertyIndentation = -1
lex.flowDepth += 1
lex.state = afterToken
lex.advance()
lex.endToken()
lex.cur = kind
2020-11-03 21:17:31 +01:00
proc leaveFlowCollection(lex: var Lexer, kind: Token) =
2020-10-29 23:34:28 +01:00
lex.startToken()
if lex.flowDepth == 0:
raise lex.generateError("No flow collection to leave!")
lex.flowDepth -= 1
if lex.flowDepth == 0:
lex.jsonEnablingState = afterToken
lex.lineStartState = lineStart
lex.state = lex.jsonEnablingState
lex.advance()
lex.endToken()
lex.cur = kind
2020-11-03 21:17:31 +01:00
proc readNamespace(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
lex.startToken()
lex.advance()
if lex.c == '<':
lex.readURI(true)
2020-10-29 23:34:28 +01:00
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.VerbatimTag
2020-11-04 19:32:09 +01:00
lex.state = afterToken
2016-09-11 12:52:24 +02:00
else:
2020-10-29 23:34:28 +01:00
var handleEnd = lex.tokenStart
2016-09-11 12:52:24 +02:00
while true:
2020-10-29 23:34:28 +01:00
case lex.source.buf[handleEnd]
of spaceOrLineEnd + flowIndicators:
handleEnd = lex.tokenStart
lex.source.bufpos -= 1
break
2016-09-11 12:52:24 +02:00
of '!':
2020-10-29 23:34:28 +01:00
handleEnd += 1
break
else:
handleEnd += 1
while lex.source.bufpos < handleEnd:
lex.advance()
if lex.c notin tagShorthandChars + {'!'}:
raise lex.generateError("Illegal character in tag handle: " & escape("" & lex.c))
lex.advance()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.TagHandle
2020-10-29 23:34:28 +01:00
lex.state = atSuffix
2020-11-03 21:17:31 +01:00
proc readAnchorName(lex: var Lexer) =
2020-10-29 23:34:28 +01:00
lex.startToken()
2016-09-11 13:04:10 +02:00
while true:
2020-10-29 23:34:28 +01:00
lex.advance()
if lex.c in spaceOrLineEnd + flowIndicators: break
if lex.source.bufpos == lex.tokenStart + 1:
2020-10-29 23:34:28 +01:00
raise lex.generateError("Anchor name must not be empty")
lex.state = afterToken
2020-11-03 21:17:31 +01:00
proc insideLine(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
case lex.c
of ':':
2020-11-03 21:17:31 +01:00
lex.checkIndicatorChar(Token.MapValueInd)
if lex.cur == Token.MapValueInd and lex.propertyIndentation != -1:
2020-10-29 23:34:28 +01:00
lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1
of '?':
2020-11-03 21:17:31 +01:00
lex.checkIndicatorChar(Token.MapKeyInd)
2020-10-29 23:34:28 +01:00
of '-':
2020-11-03 21:17:31 +01:00
lex.checkIndicatorChar(Token.SeqItemInd)
2020-10-29 23:34:28 +01:00
of commentOrLineEnd:
lex.endLine()
return false
of '"':
lex.readDoubleQuotedScalar()
lex.state = lex.jsonEnablingState
of '\'':
lex.readSingleQuotedScalar()
lex.state = lex.jsonEnablingState
of '>', '|':
if lex.flowDepth > 0:
lex.readPlainScalar()
else:
lex.readBlockScalar()
of '{':
2020-11-03 21:17:31 +01:00
lex.enterFlowCollection(Token.MapStart)
2020-10-29 23:34:28 +01:00
of '}':
2020-11-03 21:17:31 +01:00
lex.leaveFlowCollection(Token.MapEnd)
2020-10-29 23:34:28 +01:00
of '[':
2020-11-03 21:17:31 +01:00
lex.enterFlowCollection(Token.SeqStart)
2020-10-29 23:34:28 +01:00
of ']':
2020-11-03 21:17:31 +01:00
lex.leaveFlowCollection(Token.SeqEnd)
2020-10-29 23:34:28 +01:00
of ',':
lex.startToken()
lex.advance()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.SeqSep
2020-10-29 23:34:28 +01:00
lex.state = afterToken
of '!':
lex.readNamespace()
of '&':
lex.readAnchorName()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.Anchor
2020-10-29 23:34:28 +01:00
of '*':
lex.readAnchorName()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.Alias
of ' ', '\t':
while true:
lex.advance()
if lex.c notin space: break
return false
2020-10-29 23:34:28 +01:00
of '@', '`':
raise lex.generateError("Reserved character may not start any token")
else:
lex.readPlainScalar()
return true
2020-11-03 21:17:31 +01:00
proc indentationSettingToken(lex: var Lexer): bool =
2020-11-04 19:32:09 +01:00
let cachedIntentation = lex.currentIndentation()
2020-10-29 23:34:28 +01:00
result = lex.insideLine()
2020-11-04 19:32:09 +01:00
if result and lex.flowDepth == 0:
2020-10-29 23:34:28 +01:00
if lex.cur in nodePropertyKind:
lex.propertyIndentation = cachedIntentation
else:
lex.indentation = cachedIntentation
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
proc afterToken(lex: var Lexer): bool =
while lex.c in space: lex.advance()
2020-10-29 23:34:28 +01:00
if lex.c in commentOrLineEnd:
lex.endLine()
else:
lex.state = insideLine
return false
2016-09-11 14:55:29 +02:00
2020-11-03 21:17:31 +01:00
proc beforeIndentationSettingToken(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
discard lex.afterToken()
if lex.state == insideLine:
lex.state = indentationSettingToken
return false
2016-09-11 18:23:47 +02:00
2020-11-03 21:17:31 +01:00
proc afterJsonEnablingToken(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
while lex.c == ' ': lex.advance()
while true:
case lex.c
2016-09-11 18:23:47 +02:00
of ':':
2020-10-29 23:34:28 +01:00
lex.startToken()
lex.advance()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.MapValueInd
2020-10-29 23:34:28 +01:00
lex.state = afterToken
2020-11-04 19:32:09 +01:00
return true
2020-10-29 23:34:28 +01:00
of '#', '\l', '\c':
lex.endLine()
discard lex.flowLineStart()
of EndOfFile:
lex.state = streamEnd
return false
2016-09-11 18:23:47 +02:00
else:
2020-10-29 23:34:28 +01:00
lex.state = insideLine
return false
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
proc lineIndentation(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.Indentation
2020-10-29 23:34:28 +01:00
lex.state = indentationSettingToken
return true
2016-09-11 11:28:05 +02:00
2020-11-03 21:17:31 +01:00
proc lineDirEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.DirectivesEnd
lex.state = afterToken
2020-10-29 23:34:28 +01:00
lex.indentation = -1
lex.propertyIndentation = -1
return true
2020-11-03 21:17:31 +01:00
proc lineDocEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.DocumentEnd
2020-10-29 23:34:28 +01:00
lex.state = expectLineEnd
lex.lineStartState = outsideDoc
return true
2020-11-03 21:17:31 +01:00
proc atSuffix(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
lex.startToken()
lex.evaluated.setLen(0)
var curStart = lex.tokenStart - 1
while true:
case lex.c
2020-11-10 15:40:01 +01:00
of uriChars: lex.advance()
of '%':
if curStart <= lex.source.bufpos - 2:
lex.evaluated.add(lex.source.buf[curStart..lex.source.bufpos - 2])
lex.readHexSequence(2)
curStart = lex.source.bufpos
lex.advance()
else: break
if curStart <= lex.source.bufpos - 2:
lex.evaluated.add(lex.source.buf[curStart..lex.source.bufpos - 2])
2020-10-29 23:34:28 +01:00
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.Suffix
2020-10-29 23:34:28 +01:00
lex.state = afterToken
return true
2020-11-03 21:17:31 +01:00
proc streamEnd(lex: var Lexer): bool =
2020-10-29 23:34:28 +01:00
lex.startToken()
lex.endToken()
2020-11-03 21:17:31 +01:00
lex.cur = Token.StreamEnd
2020-10-29 23:34:28 +01:00
return true