made lexer tests green again

This commit is contained in:
Felix Krause 2020-11-04 19:32:09 +01:00
parent 4c604b09df
commit 2840d4d654
3 changed files with 123 additions and 105 deletions

View File

@ -2,17 +2,25 @@ import ../yaml/private/lex
import unittest, strutils import unittest, strutils
const tokensWithValue = const
tokensWithValue =
{Token.Plain, Token.SingleQuoted, Token.DoubleQuoted, Token.Literal, {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted, Token.Literal,
Token.Folded, Token.DirectiveParam, Token.Folded, Token.Suffix, Token.VerbatimTag,
Token.TagHandle, Token.Suffix, Token.VerbatimTag, Token.UnknownDirective}
Token.UnknownDirective, Token.Anchor, Token.Alias} tokensWithFullLexeme =
{Token.DirectiveParam, Token.TagHandle}
tokensWithShortLexeme = {Token.Anchor, Token.Alias}
type type
TokenWithValue = object TokenWithValue = object
case kind: Token case kind: Token
of tokensWithValue: of tokensWithValue:
value: string value: string
of tokensWithFullLexeme:
lexeme: string
of tokensWithShortLexeme:
slexeme: string
of Indentation: of Indentation:
indentation: int indentation: int
else: discard else: discard
@ -23,7 +31,7 @@ proc actualRepr(lex: Lexer, t: Token): string =
of tokensWithValue + {Token.TagHandle}: of tokensWithValue + {Token.TagHandle}:
result.add("(" & escape(lex.evaluated) & ")") result.add("(" & escape(lex.evaluated) & ")")
of Indentation: of Indentation:
result.add("(" & $lex.indentation & ")") result.add("(" & $lex.currentIndentation() & ")")
else: discard else: discard
proc assertEquals(input: string, expected: varargs[TokenWithValue]) = proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
@ -43,14 +51,22 @@ proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
doAssert lex.evaluated == expectedToken.value, "Wrong token content at #" & doAssert lex.evaluated == expectedToken.value, "Wrong token content at #" &
$i & ": Expected " & escape(expectedToken.value) & $i & ": Expected " & escape(expectedToken.value) &
", got " & escape(lex.evaluated) ", got " & escape(lex.evaluated)
of tokensWithFullLexeme:
doAssert lex.fullLexeme() == expectedToken.lexeme, "Wrong token lexeme at #" &
$i & ": Expected" & escape(expectedToken.lexeme) &
", got " & escape(lex.fullLexeme())
of tokensWithShortLexeme:
doAssert lex.shortLexeme() == expectedToken.slexeme, "Wrong token slexeme at #" &
$i & ": Expected" & escape(expectedToken.slexeme) &
", got " & escape(lex.shortLexeme())
of Indentation: of Indentation:
doAssert lex.indentation == expectedToken.indentation, doAssert lex.currentIndentation() == expectedToken.indentation,
"Wrong indentation length at #" & $i & ": Expected " & "Wrong indentation length at #" & $i & ": Expected " &
$expectedToken.indentation & ", got " & $lex.indentation $expectedToken.indentation & ", got " & $lex.currentIndentation()
else: discard else: discard
except LexerError: except LexerError:
let e = (ref LexerError)(getCurrentException()) let e = (ref LexerError)(getCurrentException())
echo "Error at line " & $e.line & ", column " & $e.column & ":" echo "Error at line", e.line, ", column", e.column, ":", e.msg
echo e.lineContent echo e.lineContent
assert false assert false
@ -71,9 +87,9 @@ proc dt(): TokenWithValue = TokenWithValue(kind: Token.TagDirective)
proc du(v: string): TokenWithValue = proc du(v: string): TokenWithValue =
TokenWithValue(kind: Token.UnknownDirective, value: v) TokenWithValue(kind: Token.UnknownDirective, value: v)
proc dp(v: string): TokenWithValue = proc dp(v: string): TokenWithValue =
TokenWithValue(kind: Token.DirectiveParam, value: v) TokenWithValue(kind: Token.DirectiveParam, lexeme: v)
proc th(v: string): TokenWithValue = proc th(v: string): TokenWithValue =
TokenWithValue(kind: Token.TagHandle, value: v) TokenWithValue(kind: Token.TagHandle, lexeme: v)
proc ts(v: string): TokenWithValue = proc ts(v: string): TokenWithValue =
TokenWithValue(kind: Token.Suffix, value: v) TokenWithValue(kind: Token.Suffix, value: v)
proc tv(v: string): TokenWithValue = proc tv(v: string): TokenWithValue =
@ -87,8 +103,8 @@ proc se(): TokenWithValue = TokenWithValue(kind: Token.SeqEnd)
proc ms(): TokenWithValue = TokenWithValue(kind: Token.MapStart) proc ms(): TokenWithValue = TokenWithValue(kind: Token.MapStart)
proc me(): TokenWithValue = TokenWithValue(kind: Token.MapEnd) proc me(): TokenWithValue = TokenWithValue(kind: Token.MapEnd)
proc sep(): TokenWithValue = TokenWithValue(kind: Token.SeqSep) proc sep(): TokenWithValue = TokenWithValue(kind: Token.SeqSep)
proc an(v: string): TokenWithValue = TokenWithValue(kind: Token.Anchor, value: v) proc an(v: string): TokenWithValue = TokenWithValue(kind: Token.Anchor, slexeme: v)
proc al(v: string): TokenWithValue = TokenWithValue(kind: Token.Alias, value: v) proc al(v: string): TokenWithValue = TokenWithValue(kind: Token.Alias, slexeme: v)
suite "Lexer": suite "Lexer":
test "Empty document": test "Empty document":
@ -133,11 +149,11 @@ suite "Lexer":
test "Directives": test "Directives":
assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html", assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html",
dy(), dp("1.2"), dirE(), i(0), pl("%TAG"), i(0), docE(), dt(), dy(), dp("1.2"), dirE(), i(0), pl("%TAG"), docE(), dt(),
th("!"), ts("example.html"), e()) th("!"), ts("example.html"), e())
test "Markers and Unknown Directive": test "Markers and Unknown Directive":
assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(), i(0), assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(),
docE(), du("UNKNOWN"), dp("warbl"), e()) docE(), du("UNKNOWN"), dp("warbl"), e())
test "Block scalar": test "Block scalar":
@ -145,7 +161,7 @@ suite "Lexer":
test "Block Scalars": test "Block Scalars":
assertEquals("one : >2-\l foo\l bar\ltwo: |+\l bar\l baz", i(0), assertEquals("one : >2-\l foo\l bar\ltwo: |+\l bar\l baz", i(0),
pl("one"), mv(), fs(" foo\lbar"), i(0), pl("two"), mv(), pl("one"), mv(), fs(" foo bar"), i(0), pl("two"), mv(),
ls("bar\l baz"), e()) ls("bar\l baz"), e())
test "Flow indicators": test "Flow indicators":
@ -153,7 +169,7 @@ suite "Lexer":
mv(), pl("d"), sep(), ss(), pl("e"), se(), mv(), pl("f"), me(), e()) mv(), pl("d"), sep(), ss(), pl("e"), se(), mv(), pl("f"), me(), e())
test "Adjacent map values in flow style": test "Adjacent map values in flow style":
assertEquals("{\"foo\":bar, [1]\l:egg}", i(0), ms(), dq("foo"), mv(), assertEquals("{\"foo\":bar, [1]\l :egg}", i(0), ms(), dq("foo"), mv(),
pl("bar"), sep(), ss(), pl("1"), se(), mv(), pl("egg"), me(), e()) pl("bar"), sep(), ss(), pl("1"), se(), mv(), pl("egg"), me(), e())
test "Tag handles": test "Tag handles":

View File

@ -273,7 +273,7 @@ proc beforeImplicitRoot(c: Context, e: var Event): bool =
if c.lex.cur != Token.Indentation: if c.lex.cur != Token.Indentation:
raise c.generateError("Unexpected token (expected line start): " & $c.lex.cur) raise c.generateError("Unexpected token (expected line start): " & $c.lex.cur)
c.inlineStart = c.lex.curEndPos c.inlineStart = c.lex.curEndPos
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
c.lex.next() c.lex.next()
case c.lex.cur case c.lex.cur
of SeqItemInd, MapKeyInd, MapValueInd: of SeqItemInd, MapKeyInd, MapValueInd:
@ -292,7 +292,7 @@ proc beforeImplicitRoot(c: Context, e: var Event): bool =
raise c.generateError("Unexpected token (expected collection start): " & $c.lex.cur) raise c.generateError("Unexpected token (expected collection start): " & $c.lex.cur)
proc requireImplicitMapStart(c: Context, e: var Event): bool = proc requireImplicitMapStart(c: Context, e: var Event): bool =
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of Alias: of Alias:
e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos)
@ -346,7 +346,7 @@ proc atBlockIndentation(c: Context, e: var Event): bool =
discard c.levels.pop() discard c.levels.pop()
return true return true
c.inlineStart = c.lex.curStartPos c.inlineStart = c.lex.curStartPos
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of nodePropertyKind: of nodePropertyKind:
if isEmpty(c.headerProps): if isEmpty(c.headerProps):
@ -359,9 +359,9 @@ proc atBlockIndentation(c: Context, e: var Event): bool =
e = startSeqEvent(csBlock, c.headerProps, e = startSeqEvent(csBlock, c.headerProps,
c.headerStart, c.lex.curEndPos) c.headerStart, c.lex.curEndPos)
c.headerProps = defaultProperties c.headerProps = defaultProperties
c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.indentation) c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.currentIndentation())
c.levels.add(Level(state: beforeBlockIndentation, indentation: 0)) c.levels.add(Level(state: beforeBlockIndentation, indentation: 0))
c.levels.add(Level(state: afterCompactParent, indentation: c.lex.indentation)) c.levels.add(Level(state: afterCompactParent, indentation: c.lex.currentIndentation()))
c.lex.next() c.lex.next()
return true return true
of MapKeyInd: of MapKeyInd:
@ -370,10 +370,10 @@ proc atBlockIndentation(c: Context, e: var Event): bool =
c.headerProps = defaultProperties c.headerProps = defaultProperties
c.levels[^1] = Level(state: beforeBlockMapValue, indentation: 0) c.levels[^1] = Level(state: beforeBlockMapValue, indentation: 0)
c.levels.add(Level(state: beforeBlockIndentation)) c.levels.add(Level(state: beforeBlockIndentation))
c.levels.add(Level(state: afterCompactParent, indentation: c.lex.indentation)) c.levels.add(Level(state: afterCompactParent, indentation: c.lex.currentIndentation()))
c.lex.next() c.lex.next()
of Plain, SingleQuoted, DoubleQuoted: of Plain, SingleQuoted, DoubleQuoted:
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
e = scalarEvent(c.lex.evaluated, c.headerProps, e = scalarEvent(c.lex.evaluated, c.headerProps,
toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos)
c.headerProps = defaultProperties c.headerProps = defaultProperties
@ -409,7 +409,7 @@ proc atBlockIndentation(c: Context, e: var Event): bool =
c.levels[^1].state = atBlockIndentationProps c.levels[^1].state = atBlockIndentationProps
proc atBlockIndentationProps(c: Context, e: var Event): bool = proc atBlockIndentationProps(c: Context, e: var Event): bool =
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of MapValueInd: of MapValueInd:
c.peek = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curEndPos) c.peek = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curEndPos)
@ -487,7 +487,7 @@ proc afterCompactParent(c: Context, e: var Event): bool =
of SeqItemInd: of SeqItemInd:
e = startSeqEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos) e = startSeqEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos)
c.headerProps = defaultProperties c.headerProps = defaultProperties
c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.indentation) c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.currentIndentation())
c.levels.add(Level(state: beforeBlockIndentation)) c.levels.add(Level(state: beforeBlockIndentation))
c.levels.add(Level(state: afterCompactParent)) c.levels.add(Level(state: afterCompactParent))
c.lex.next() c.lex.next()
@ -495,7 +495,7 @@ proc afterCompactParent(c: Context, e: var Event): bool =
of MapKeyInd: of MapKeyInd:
e = startMapEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos) e = startMapEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos)
c.headerProps = defaultProperties c.headerProps = defaultProperties
c.levels[^1] = Level(state: beforeBlockMapValue, indentation: c.lex.indentation) c.levels[^1] = Level(state: beforeBlockMapValue, indentation: c.lex.currentIndentation())
c.levels.add(Level(state: beforeBlockIndentation)) c.levels.add(Level(state: beforeBlockIndentation))
c.levels.add(Level(state: afterCompactParent)) c.levels.add(Level(state: afterCompactParent))
return true return true
@ -504,7 +504,7 @@ proc afterCompactParent(c: Context, e: var Event): bool =
return false return false
proc afterCompactParentProps(c: Context, e: var Event): bool = proc afterCompactParentProps(c: Context, e: var Event): bool =
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of nodePropertyKind: of nodePropertyKind:
c.levels.add(Level(state: beforeNodeProperties)) c.levels.add(Level(state: beforeNodeProperties))
@ -541,7 +541,7 @@ proc afterCompactParentProps(c: Context, e: var Event): bool =
c.inlineStart, c.lex.curEndPos) c.inlineStart, c.lex.curEndPos)
c.inlineProps = defaultProperties c.inlineProps = defaultProperties
let headerEnd = c.lex.curStartPos let headerEnd = c.lex.curStartPos
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
c.lex.next() c.lex.next()
if c.lex.cur == Token.MapValueInd: if c.lex.cur == Token.MapValueInd:
if c.lex.lastScalarWasMultiline(): if c.lex.lastScalarWasMultiline():
@ -580,7 +580,7 @@ proc afterBlockParent(c: Context, e: var Event): bool =
return false return false
proc afterBlockParentProps(c: Context, e: var Event): bool = proc afterBlockParentProps(c: Context, e: var Event): bool =
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of nodePropertyKind: of nodePropertyKind:
c.levels.add(Level(state: beforeNodeProperties)) c.levels.add(Level(state: beforeNodeProperties))
@ -600,7 +600,7 @@ proc afterBlockParentProps(c: Context, e: var Event): bool =
return false return false
proc requireInlineBlockItem(c: Context, e: var Event): bool = proc requireInlineBlockItem(c: Context, e: var Event): bool =
c.levels[^1].indentation = c.lex.indentation c.levels[^1].indentation = c.lex.currentIndentation()
case c.lex.cur case c.lex.cur
of Indentation: of Indentation:
raise c.generateError("Node properties may not stand alone on a line") raise c.generateError("Node properties may not stand alone on a line")
@ -740,7 +740,7 @@ proc beforeBlockIndentation(c: Context, e: var Event): bool =
discard c.levels.pop() discard c.levels.pop()
case c.lex.cur case c.lex.cur
of Indentation: of Indentation:
c.blockIndentation = c.lex.indentation c.blockIndentation = c.lex.currentIndentation()
if c.blockIndentation < c.levels[^1].indentation: if c.blockIndentation < c.levels[^1].indentation:
endBlockNode(e) endBlockNode(e)
return true return true

View File

@ -16,10 +16,8 @@ type
curStartPos*, curEndPos*: Mark curStartPos*, curEndPos*: Mark
# recently read scalar or URI, if any # recently read scalar or URI, if any
evaluated*: string evaluated*: string
# ltIndentation
indentation*: int
# internals # internals
indentation: int
source: BaseLexer source: BaseLexer
tokenStart: int tokenStart: int
flowDepth: int flowDepth: int
@ -75,7 +73,6 @@ const
spaceOrLineEnd = {' ', '\t', '\l', '\c', EndOfFile} spaceOrLineEnd = {' ', '\t', '\l', '\c', EndOfFile}
commentOrLineEnd = {'\l', '\c', EndOfFile, '#'} commentOrLineEnd = {'\l', '\c', EndOfFile, '#'}
digits = {'0'..'9'} digits = {'0'..'9'}
hexDigits = {'0'..'9', 'a'..'f', 'A'..'F'}
flowIndicators = {'[', ']', '{', '}', ','} flowIndicators = {'[', ']', '{', '}', ','}
uriChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':', uriChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', ':',
'@', '&', '-', '=', '+', '$', '_', '.', '~', '*', '\'', '(', ')'} '@', '&', '-', '=', '+', '$', '_', '.', '~', '*', '\'', '(', ')'}
@ -93,41 +90,44 @@ const
UnknownIndentation* = int.low UnknownIndentation* = int.low
proc currentIndentation*(lex: Lexer): Natural =
return lex.source.getColNumber(lex.source.bufpos) - 1
# lexer source handling # lexer source handling
proc advance(lex: var Lexer, step: int = 1) {.inline.} = proc advance(lex: var Lexer, step: int = 1) {.inline.} =
lex.source.bufpos.inc(step)
lex.c = lex.source.buf[lex.source.bufpos] lex.c = lex.source.buf[lex.source.bufpos]
lex.source.bufpos.inc(step)
template lexCR(lex: var Lexer) = template lexCR(lex: var Lexer) =
try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos) try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos - 1)
except: except:
var e = lex.generateError("Encountered stream error: " & var e = lex.generateError("Encountered stream error: " &
getCurrentExceptionMsg()) getCurrentExceptionMsg())
e.parent = getCurrentException() e.parent = getCurrentException()
raise e raise e
lex.c = lex.source.buf[lex.source.bufpos] lex.advance()
template lexLF(lex: var Lexer) = template lexLF(lex: var Lexer) =
try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos) try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos - 1)
except: except:
var e = generateError(lex, "Encountered stream error: " & var e = generateError(lex, "Encountered stream error: " &
getCurrentExceptionMsg()) getCurrentExceptionMsg())
e.parent = getCurrentException() e.parent = getCurrentException()
raise e raise e
lex.c = lex.source.buf[lex.source.bufpos] lex.advance()
template lineNumber(lex: Lexer): Positive = template lineNumber(lex: Lexer): Positive =
lex.source.lineNumber lex.source.lineNumber
template columnNumber(lex: Lexer): Positive = template columnNumber(lex: Lexer): Positive =
lex.source.getColNumber(lex.source.bufpos) + 1 lex.source.getColNumber(lex.source.bufpos)
template currentLine(lex: Lexer): string = template currentLine(lex: Lexer): string =
lex.source.getCurrentLine(true) lex.source.getCurrentLine(true)
proc isPlainSafe(lex: Lexer): bool {.inline.} = proc isPlainSafe(lex: Lexer): bool {.inline.} =
case lex.source.buf[lex.source.bufpos + 1] case lex.source.buf[lex.source.bufpos]
of spaceOrLineEnd: result = false of spaceOrLineEnd: result = false
of flowIndicators: result = lex.flowDepth == 0 of flowIndicators: result = lex.flowDepth == 0
else: result = true else: result = true
@ -218,26 +218,22 @@ proc isDocumentEnd(lex: var Lexer): bool =
proc readHexSequence(lex: var Lexer, len: int) = proc readHexSequence(lex: var Lexer, len: int) =
var charPos = 0 var charPos = 0
let startPos = lex.source.bufpos
for i in countup(0, len-1): for i in countup(0, len-1):
if lex.source.buf[startPos + 1] notin hexDigits:
raise lex.generateError("Invalid character in hex escape sequence: " &
escape("" & lex.source.buf[startPos + i]))
# no pow() for ints, do it manually
var coeff = 1
for exponent in countup(0, len-1): coeff *= 16
for exponent in countdown(len-1, 0):
lex.advance() lex.advance()
let digitPosition = len - i - 1
case lex.c case lex.c
of digits: of lineEnd:
charPos += coeff * (int(lex.c) - int('0')) raise lex.generateError("Unfinished unicode escape sequence")
of 'a' .. 'f': of '0'..'9':
charPos += coeff * (int(lex.c) - int('a') + 10) charPos = charPos or (int(lex.c) - 0x30) shl (digitPosition * 4)
of 'A' .. 'F': of 'A' .. 'F':
charPos += coeff * (int(lex.c) - int('A') + 10) charPos = charPos or (int(lex.c) - 0x37) shl (digitPosition * 4)
else: discard # cannot happen, we checked of 'a' .. 'f':
coeff = coeff div 16 charPos = charPos or (int(lex.c) - 0x57) shl (digitPosition * 4)
lex.evaluated.add($Rune(charPos)) else:
raise lex.generateError("Invalid character in hex escape sequence: " &
escape("" & lex.c))
lex.evaluated.add(toUTF8(Rune(charPos)))
proc readURI(lex: var Lexer) = proc readURI(lex: var Lexer) =
lex.evaluated.setLen(0) lex.evaluated.setLen(0)
@ -383,7 +379,7 @@ proc readPlainScalar(lex: var Lexer) =
break inlineLoop break inlineLoop
of EndOfFile: of EndOfFile:
lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
if lex.columnNumber() > 0: if lex.currentIndentation() > 0:
lex.endToken() lex.endToken()
lex.state = streamEnd lex.state = streamEnd
break multilineLoop break multilineLoop
@ -394,7 +390,7 @@ proc readPlainScalar(lex: var Lexer) =
while true: while true:
case lex.startLine() case lex.startLine()
of lsContent: of lsContent:
if lex.columnNumber() <= lex.indentation: if lex.currentIndentation() <= lex.indentation:
lex.state = afterNewlineState lex.state = afterNewlineState
break multilineLoop break multilineLoop
break newlineLoop break newlineLoop
@ -412,6 +408,7 @@ proc readPlainScalar(lex: var Lexer) =
break multilineLoop break multilineLoop
of lsNewline: lex.endLine() of lsNewline: lex.endLine()
newlines += 1 newlines += 1
while lex.c == ' ': lex.advance()
if (lex.c == ':' and not lex.isPlainSafe()) or if (lex.c == ':' and not lex.isPlainSafe()) or
lex.c == '#' or (lex.c in flowIndicators and lex.c == '#' or (lex.c in flowIndicators and
lex.flowDepth > 0): lex.flowDepth > 0):
@ -423,7 +420,7 @@ proc readPlainScalar(lex: var Lexer) =
for i in countup(2, newlines): lex.evaluated.add('\l') for i in countup(2, newlines): lex.evaluated.add('\l')
proc streamEndAfterBlock(lex: var Lexer) = proc streamEndAfterBlock(lex: var Lexer) =
if lex.columnNumber() != 0: if lex.currentIndentation() != 0:
lex.endToken() lex.endToken()
lex.curEndPos.column -= 1 lex.curEndPos.column -= 1
@ -475,13 +472,13 @@ proc readBlockScalar(lex: var Lexer) =
if indent == 0: if indent == 0:
while lex.c == ' ': lex.advance() while lex.c == ' ': lex.advance()
else: else:
maxLeadingSpaces = lex.columnNumber + indent maxLeadingSpaces = lex.currentIndentation() + indent
while lex.c == ' ' and lex.columnNumber < maxLeadingSpaces: while lex.c == ' ' and lex.currentIndentation() < maxLeadingSpaces:
lex.advance() lex.advance()
case lex.c case lex.c
of '\l', '\c': of '\l', '\c':
lex.endToken() lex.endToken()
maxLeadingSpaces = max(maxLeadingSpaces, lex.columnNumber()) maxLeadingSpaces = max(maxLeadingSpaces, lex.currentIndentation())
lex.endLine() lex.endLine()
separationLines += 1 separationLines += 1
of EndOfFile: of EndOfFile:
@ -490,59 +487,60 @@ proc readBlockScalar(lex: var Lexer) =
break body break body
else: else:
if indent == 0: if indent == 0:
indent = lex.columnNumber() indent = lex.currentIndentation()
if indent <= max(0, lex.indentation): if indent <= max(0, lex.indentation):
lex.state = lineIndentation lex.state = lineIndentation
break body break body
elif indent < maxLeadingSpaces: elif indent < maxLeadingSpaces:
raise lex.generateError("Leading all-spaces line contains too many spaces") raise lex.generateError("Leading all-spaces line contains too many spaces")
elif lex.columnNumber < indent: break body elif lex.currentIndentation() < indent: break body
break break
for i in countup(0, separationLines - 1): for i in countup(0, separationLines - 1):
lex.evaluated.add('\l') lex.evaluated.add('\l')
block content: block content:
contentStart = lex.source.bufpos - 1
while lex.c notin lineEnd: lex.advance()
lex.evaluated.add(lex.source.buf[contentStart .. lex.source.bufpos - 2])
separationLines = 0
if lex.c == EndOfFile:
lex.state = streamEnd
lex.streamEndAfterBlock()
break body
separationLines += 1
lex.endToken()
lex.endLine()
# empty lines and indentation of next line
while true: while true:
while lex.c == ' ' and lex.columnNumber() < indent: contentStart = lex.source.bufpos - 1
lex.advance() while lex.c notin lineEnd: lex.advance()
case lex.c lex.evaluated.add(lex.source.buf[contentStart .. lex.source.bufpos - 2])
of '\l', '\c': separationLines = 0
lex.endToken() if lex.c == EndOfFile:
separationLines += 1
lex.endLine()
of EndOfFile:
lex.state = streamEnd lex.state = streamEnd
lex.streamEndAfterBlock() lex.streamEndAfterBlock()
break body break body
separationLines += 1
lex.endToken()
lex.endLine()
# empty lines and indentation of next line
while true:
while lex.c == ' ' and lex.currentIndentation() < indent:
lex.advance()
case lex.c
of '\l', '\c':
lex.endToken()
separationLines += 1
lex.endLine()
of EndOfFile:
lex.state = streamEnd
lex.streamEndAfterBlock()
break body
else:
if lex.currentIndentation() < indent:
break content
else: break
# line folding
if lex.cur == Token.Literal:
for i in countup(0, separationLines - 1):
lex.evaluated.add('\l')
elif separationLines == 1:
lex.evaluated.add(' ')
else: else:
if lex.columnNumber() < indent: for i in countup(0, separationLines - 2):
break content lex.evaluated.add('\l')
else: break
# line folding if lex.currentIndentation() > max(0, lex.indentation):
if lex.cur == Token.Literal:
for i in countup(0, separationLines - 1):
lex.evaluated.add('\l')
elif separationLines == 1:
lex.evaluated.add(' ')
else:
for i in countup(0, separationLines - 2):
lex.evaluated.add('\l')
if lex.columnNumber() > max(0, lex.indentation):
if lex.c == '#': if lex.c == '#':
lex.state = expectLineEnd lex.state = expectLineEnd
else: else:
@ -755,7 +753,7 @@ proc outsideDoc(lex: var Lexer): bool =
lex.startToken() lex.startToken()
if lex.isDirectivesEnd(): if lex.isDirectivesEnd():
lex.state = expectLineEnd lex.state = expectLineEnd
lex.cur = Token.DocumentEnd lex.cur = Token.DirectivesEnd
else: else:
lex.state = indentationSettingToken lex.state = indentationSettingToken
lex.cur = Token.Indentation lex.cur = Token.Indentation
@ -799,6 +797,7 @@ proc yamlVersion(lex: var Lexer): bool =
lex.cur = Token.DirectiveParam lex.cur = Token.DirectiveParam
lex.endToken() lex.endToken()
lex.state = expectLineEnd lex.state = expectLineEnd
return true
proc tagShorthand(lex: var Lexer): bool = proc tagShorthand(lex: var Lexer): bool =
debug("lex: tagShorthand") debug("lex: tagShorthand")
@ -822,6 +821,7 @@ proc tagShorthand(lex: var Lexer): bool =
lex.cur = Token.TagHandle lex.cur = Token.TagHandle
lex.endToken() lex.endToken()
lex.state = tagUri lex.state = tagUri
return true
proc tagUri(lex: var Lexer): bool = proc tagUri(lex: var Lexer): bool =
debug("lex: tagUri") debug("lex: tagUri")
@ -886,7 +886,7 @@ proc flowLineStart(lex: var Lexer): bool =
return false return false
proc flowLineIndentation(lex: var Lexer): bool = proc flowLineIndentation(lex: var Lexer): bool =
if lex.columnNumber() < lex.indentation: if lex.currentIndentation() < lex.indentation:
raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)") raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)")
lex.state = insideLine lex.state = insideLine
return false return false
@ -933,6 +933,7 @@ proc readNamespace(lex: var Lexer) =
lex.readURI() lex.readURI()
lex.endToken() lex.endToken()
lex.cur = Token.VerbatimTag lex.cur = Token.VerbatimTag
lex.state = afterToken
else: else:
var handleEnd = lex.tokenStart var handleEnd = lex.tokenStart
while true: while true:
@ -1022,9 +1023,9 @@ proc insideLine(lex: var Lexer): bool =
return true return true
proc indentationSettingToken(lex: var Lexer): bool = proc indentationSettingToken(lex: var Lexer): bool =
let cachedIntentation = lex.columnNumber() let cachedIntentation = lex.currentIndentation()
result = lex.insideLine() result = lex.insideLine()
if result and lex.flowDepth > 0: if result and lex.flowDepth == 0:
if lex.cur in nodePropertyKind: if lex.cur in nodePropertyKind:
lex.propertyIndentation = cachedIntentation lex.propertyIndentation = cachedIntentation
else: else:
@ -1054,6 +1055,7 @@ proc afterJsonEnablingToken(lex: var Lexer): bool =
lex.endToken() lex.endToken()
lex.cur = Token.MapValueInd lex.cur = Token.MapValueInd
lex.state = afterToken lex.state = afterToken
return true
of '#', '\l', '\c': of '#', '\l', '\c':
lex.endLine() lex.endLine()
discard lex.flowLineStart() discard lex.flowLineStart()