lexer: worked on block scalars and stuff

This commit is contained in:
Felix Krause 2016-09-10 17:19:37 +02:00
parent 79f432a27d
commit ba02b41555
2 changed files with 246 additions and 77 deletions

View File

@ -5,6 +5,9 @@
# distribution, for details about the copyright. # distribution, for details about the copyright.
import lexbase, streams, strutils, unicode import lexbase, streams, strutils, unicode
when defined(yamlDebug):
import terminal
export terminal
type type
StringSource* = object StringSource* = object
@ -22,24 +25,30 @@ type
source: T source: T
inFlow: bool inFlow: bool
literalEndIndent: int literalEndIndent: int
nextImpl, stored: LexerState[T] nextImpl, lineStartImpl, inlineImpl: LexerState[T]
buf*: string not nil buf*: string not nil
indentation*: int indentation*: int
blockScalarIndent: int
moreIndented*, folded*: bool
chomp*: ChompType
c: char c: char
LexerState[T] = proc(lex: YamlLexer[T], t: var LexerToken): bool LexerState[T] = proc(lex: YamlLexer[T], t: var LexerToken): bool
LexerToken* = enum LexerToken* = enum
ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand, ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand,
ltTagUrl, ltUnknownDirective, ltUnknownDirectiveParams, ltTagUri, ltUnknownDirective, ltUnknownDirectiveParams, ltEmptyLine,
ltDirectivesEnd, ltDocumentEnd, ltStreamEnd, ltIndentation, ltQuotedScalar, ltDirectivesEnd, ltDocumentEnd, ltStreamEnd, ltIndentation, ltQuotedScalar,
ltScalarPart, ltEmptyLine, ltSeqItemInd, ltMapKeyInd, ltMapValInd, ltScalarPart, ltBlockScalarHeader, ltSeqItemInd, ltMapKeyInd, ltMapValInd,
ltBraceOpen, ltBraceClose, ltBracketOpen, ltBracketClose, ltComma, ltBraceOpen, ltBraceClose, ltBracketOpen, ltBracketClose, ltComma,
ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias
YamlLexerError* = object of Exception YamlLexerError* = object of Exception
line, column: int line*, column*: int
lineContent: string lineContent*: string
ChompType* = enum
ctKeep, ctClip, ctStrip
# templates # templates
@ -52,6 +61,14 @@ proc advance(lex: YamlLexer[StringSource], step: int = 1) {.inline.} =
if lex.source.pos >= lex.source.src.len: lex.c = EndOfFile if lex.source.pos >= lex.source.src.len: lex.c = EndOfFile
else: lex.c = lex.source.src[lex.source.pos] else: lex.c = lex.source.src[lex.source.pos]
proc peek(lex: YamlLexer[StringSource], at: int = 1): char {.inline.} =
let index = lex.source.pos + at
if index >= lex.source.src.len: result = EndOfFile
else: result = lex.source.src[index]
proc peek(lex: YamlLexer[BaseLexer], at: int = 1): char {.inline.} =
lex.source.buf[lex.source.bufpos + at]
# lexer states # lexer states
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool
@ -60,10 +77,14 @@ proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
proc afterSeqInd[T](lex: YamlLexer[T], t: var LexerToken): bool
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.} proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool proc blockScalarHeader[T](lex: YamlLexer[T], t: var LexerToken): bool
proc blockScalar[T](lex: YamlLexer[T], t: var LexerToken): bool
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
# interface # interface
@ -81,10 +102,20 @@ proc newYamlLexer*(source: string, startAt: int = 0):
proc init*[T](lex: YamlLexer[T]) = proc init*[T](lex: YamlLexer[T]) =
lex.nextImpl = outsideDoc[T] lex.nextImpl = outsideDoc[T]
lex.lineStartImpl = outsideDoc[T]
lex.inlineImpl = blockStyleInline[T]
proc next*(lex: YamlLexer): LexerToken = proc next*(lex: YamlLexer): LexerToken =
while not lex.nextImpl(lex, result): discard while not lex.nextImpl(lex, result): discard
proc setFlow*[T](lex: YamlLexer[T], value: bool) =
lex.inFlow = value
proc endBlockScalar*[T](lex: YamlLexer[T]) =
assert lex.nextImpl == blockScalar[T], "Expected blockScalar, got " & lex.nextImpl.repr
lex.inlineImpl = blockStyleInline[T]
lex.nextImpl = blockStyleInline[T]
# implementation # implementation
const const
@ -98,6 +129,8 @@ const
UTF8NonBreakingSpace = toUTF8(0xA0.Rune) UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
UTF8LineSeparator = toUTF8(0x2028.Rune) UTF8LineSeparator = toUTF8(0x2028.Rune)
UTF8ParagraphSeparator = toUTF8(0x2029.Rune) UTF8ParagraphSeparator = toUTF8(0x2029.Rune)
UnknownIndentation = int.low
template debug(message: string) {.dirty.} = template debug(message: string) {.dirty.} =
when defined(yamlDebug): when defined(yamlDebug):
@ -146,7 +179,7 @@ template currentLine(lex: YamlLexer[StringSource]): string =
while lex.source.src[i] notin lineEnd: while lex.source.src[i] notin lineEnd:
result.add(lex.source.src[i]) result.add(lex.source.src[i])
inc(i) inc(i)
result.add("\n" & spaces(lex.columnNumber) & "^\n") result.add("\n" & spaces(lex.columnNumber - 1) & "^\n")
result result
proc generateError(lex: YamlLexer, message: string): proc generateError(lex: YamlLexer, message: string):
@ -183,7 +216,6 @@ proc yamlVersion[T](lex: YamlLexer[T], t: var LexerToken): bool =
raise lex.generateError("Invalid YAML version number") raise lex.generateError("Invalid YAML version number")
t = ltYamlVersion t = ltYamlVersion
result = true result = true
lex.stored = outsideDoc[T]
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
template nextIsPlainSafe(lex: YamlLexer[BaseLexer], inFlow: bool): bool = template nextIsPlainSafe(lex: YamlLexer[BaseLexer], inFlow: bool): bool =
@ -219,6 +251,8 @@ proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.advance() lex.advance()
if lex.c notin spaceOrLineEnd: if lex.c notin spaceOrLineEnd:
raise lex.generateError("Missing space after tag shorthand") raise lex.generateError("Missing space after tag shorthand")
t = ltTagShorthand
result = true
lex.nextImpl = tagUri[T] lex.nextImpl = tagUri[T]
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool = proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
@ -236,12 +270,18 @@ proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.advance() lex.advance()
else: raise lex.generateError("Invalid character in tag uri: " & else: raise lex.generateError("Invalid character in tag uri: " &
escape("" & lex.c)) escape("" & lex.c))
t = ltTagUri
result = true
lex.nextImpl = expectLineEnd[T]
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool = proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool =
while lex.c notin lineEnd + {'#'}: lex.advance() debug("lex: unknownDirParams")
while lex.c in space: lex.advance()
while lex.c notin lineEnd + {'#'}:
lex.buf.add(lex.c)
lex.advance()
t = ltUnknownDirectiveParams t = ltUnknownDirectiveParams
result = true result = true
lex.stored = outsideDoc[T]
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool = proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
@ -258,17 +298,19 @@ proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
break break
of '\l': of '\l':
lex.lexLF() lex.lexLF()
lex.nextImpl = lex.stored lex.nextImpl = lex.lineStartImpl
break break
of '\c': of '\c':
lex.lexCR() lex.lexCR()
lex.nextImpl = lex.stored lex.nextImpl = lex.lineStartImpl
break break
else: else:
raise lex.generateError("Unexpected character (expected line end): " & raise lex.generateError("Unexpected character (expected line end): " &
escape("" & lex.c)) escape("" & lex.c))
proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken) = proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: possibleDirectivesEnd")
lex.lineStartImpl = blockStyle[T]
lex.advance() lex.advance()
if lex.c == '-': if lex.c == '-':
lex.advance() lex.advance()
@ -277,20 +319,26 @@ proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken) =
if lex.c in spaceOrLineEnd: if lex.c in spaceOrLineEnd:
t = ltDirectivesEnd t = ltDirectivesEnd
lex.nextImpl = blockStyleInline[T] lex.nextImpl = blockStyleInline[T]
return return true
lex.buf.add('-') lex.buf.add('-')
lex.buf.add('-') lex.buf.add('-')
elif lex.c in spaceOrLineEnd: elif lex.c in spaceOrLineEnd:
lex.advance() lex.indentation = 0
t = ltSeqItemInd t = ltIndentation
lex.nextImpl = blockStyleInline[T] lex.nextImpl = afterSeqInd[T]
return return true
lex.buf.add('-') lex.buf.add('-')
lex.nextImpl = plainScalarPart[T] lex.nextImpl = plainScalarPart[T]
lex.indentation = 0 result = false
t = ltIndentation
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken) = proc afterSeqInd[T](lex: YamlLexer[T], t: var LexerToken): bool =
result = true
t = ltSeqItemInd
if lex.c notin lineEnd: lex.advance()
lex.nextImpl = blockStyleInline[T]
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: possibleDocumentEnd")
lex.advance() lex.advance()
if lex.c == '.': if lex.c == '.':
lex.advance() lex.advance()
@ -299,16 +347,16 @@ proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken) =
if lex.c in spaceOrLineEnd: if lex.c in spaceOrLineEnd:
t = ltDocumentEnd t = ltDocumentEnd
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = outsideDoc[T] lex.lineStartImpl = outsideDoc[T]
return return true
lex.buf.add('.') lex.buf.add('.')
lex.buf.add('.') lex.buf.add('.')
lex.buf.add('.') lex.buf.add('.')
lex.nextImpl = plainScalarPart[T] lex.nextImpl = plainScalarPart[T]
lex.indentation = 0 result = false
t = ltIndentation
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool = proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: outsideDoc")
case lex.c case lex.c
of '%': of '%':
lex.advance() lex.advance()
@ -316,8 +364,10 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
case lex.buf case lex.buf
of "YAML": of "YAML":
t = ltYamlDirective t = ltYamlDirective
lex.buf.setLen(0)
lex.nextImpl = yamlVersion[T] lex.nextImpl = yamlVersion[T]
of "TAG": of "TAG":
lex.buf.setLen(0)
t = ltTagDirective t = ltTagDirective
lex.nextImpl = tagShorthand[T] lex.nextImpl = tagShorthand[T]
else: else:
@ -325,11 +375,11 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.nextImpl = unknownDirParams[T] lex.nextImpl = unknownDirParams[T]
return true return true
of '-': of '-':
lex.possibleDirectivesEnd(t) lex.nextImpl = possibleDirectivesEnd[T]
return true return false
of '.': of '.':
lex.possibleDocumentEnd(t) lex.indentation = 0
return true lex.nextImpl = possibleDocumentEnd[T]
of spaceOrLineEnd + {'#'}: of spaceOrLineEnd + {'#'}:
lex.indentation = 0 lex.indentation = 0
while lex.c == ' ': while lex.c == ' ':
@ -337,33 +387,34 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.advance() lex.advance()
if lex.c in spaceOrLineEnd + {'#'}: if lex.c in spaceOrLineEnd + {'#'}:
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = outsideDoc[T]
return false return false
else: lex.indentation = 0 lex.nextImpl = blockStyleInline[T]
lex.nextImpl = blockStyleInline[T] else:
lex.indentation = 0
lex.nextImpl = blockStyleInline[T]
lex.lineStartImpl = blockStyle[T]
t = ltIndentation t = ltIndentation
result = true result = true
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool = proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: blockStyle")
lex.indentation = 0
case lex.c case lex.c
of '-': of '-':
lex.possibleDirectivesEnd(t) lex.nextImpl = possibleDirectivesEnd[T]
return true return false
of '.': of '.': lex.nextImpl = possibleDocumentEnd[T]
lex.possibleDocumentEnd(t) of spaceOrLineEnd:
return true
of spaceOrLineEnd + {'#'}:
lex.indentation = 0
while lex.c == ' ': while lex.c == ' ':
lex.indentation.inc() lex.indentation.inc()
lex.advance() lex.advance()
if lex.c in spaceOrLineEnd + {'#'}: if lex.c in spaceOrLineEnd:
lex.nextImpl = expectLineEnd[T]
lex.stored = blockStyle[T]
t = ltEmptyLine t = ltEmptyLine
lex.nextImpl = expectLineEnd[T]
return true return true
else: lex.indentation = 0 else:
lex.nextImpl = blockStyleInline[T] lex.nextImpl = lex.inlineImpl
else: lex.nextImpl = lex.inlineImpl
t = ltIndentation t = ltIndentation
result = true result = true
@ -371,7 +422,6 @@ proc possibleIndicatorChar[T](lex: YamlLexer[T], indicator: LexerToken,
t: var LexerToken): bool = t: var LexerToken): bool =
if lex.nextIsPlainSafe(false): if lex.nextIsPlainSafe(false):
lex.nextImpl = plainScalarPart[T] lex.nextImpl = plainScalarPart[T]
lex.stored = blockStyleInline[T]
result = false result = false
else: else:
t = indicator t = indicator
@ -380,7 +430,6 @@ proc possibleIndicatorChar[T](lex: YamlLexer[T], indicator: LexerToken,
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c in lineEnd: if lex.c in lineEnd:
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = blockStyle[T]
proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken, proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
t: var LexerToken, inFlow: static[bool]): bool {.inline.} = t: var LexerToken, inFlow: static[bool]): bool {.inline.} =
@ -389,12 +438,9 @@ proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c in lineEnd: if lex.c in lineEnd:
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
when inFlow: lex.stored = flowStyle[T]
else: lex.stored = blockStyle[T]
proc addMultiple(s: var string, c: char, num: int) {.raises: [], inline.} = proc addMultiple(s: var string, c: char, num: int) {.raises: [], inline.} =
for i in 1..num: for i in 1..num: s.add(c)
s.add(c)
proc processQuotedWhitespace(lex: YamlLexer, newlines: var int) = proc processQuotedWhitespace(lex: YamlLexer, newlines: var int) =
block outer: block outer:
@ -515,6 +561,7 @@ proc doubleQuotedScalar[T](lex: YamlLexer[T]) =
lex.advance() lex.advance()
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool = proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: blockStyleInline")
case lex.c case lex.c
of ':': result = lex.possibleIndicatorChar(ltMapValInd, t) of ':': result = lex.possibleIndicatorChar(ltMapValInd, t)
of '?': result = lex.possibleIndicatorChar(ltMapKeyInd, t) of '?': result = lex.possibleIndicatorChar(ltMapKeyInd, t)
@ -522,7 +569,6 @@ proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
of lineEnd + {'#'}: of lineEnd + {'#'}:
result = false result = false
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = blockStyle[T]
of '\"': of '\"':
lex.doubleQuotedScalar() lex.doubleQuotedScalar()
t = ltQuotedScalar t = ltQuotedScalar
@ -532,19 +578,19 @@ proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
t = ltQuotedScalar t = ltQuotedScalar
result = true result = true
of '>', '|': of '>', '|':
# TODO if lex.inFlow: lex.nextImpl = plainScalarPart[T]
result = true else: lex.nextImpl = blockScalarHeader[T]
result = false
of '{': result = lex.flowIndicator(ltBraceOpen, t, false) of '{': result = lex.flowIndicator(ltBraceOpen, t, false)
of '}': result = lex.flowIndicator(ltBraceClose, t, false) of '}': result = lex.flowIndicator(ltBraceClose, t, false)
of '[': result = lex.flowIndicator(ltBracketOpen, t, false) of '[': result = lex.flowIndicator(ltBracketOpen, t, false)
of ']': result = lex.flowIndicator(ltBracketClose, t, false) of ']': result = lex.flowIndicator(ltBracketClose, t, false)
else: else:
lex.nextImpl = plainScalarPart[T] lex.nextImpl = plainScalarPart[T]
lex.stored = blockStyleInline[T]
result = false result = false
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool = proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: plainScalar") debug("lex: plainScalarPart")
block outer: block outer:
while true: while true:
lex.buf.add(lex.c) lex.buf.add(lex.c)
@ -559,18 +605,17 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
of lineEnd + {'#'}: of lineEnd + {'#'}:
lex.buf.setLen(lenBeforeSpace) lex.buf.setLen(lenBeforeSpace)
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
break outer break outer
of ':': of ':':
if lex.nextIsPlainSafe(lex.inFlow): break if lex.nextIsPlainSafe(lex.inFlow): break
else: else:
lex.buf.setLen(lenBeforeSpace) lex.buf.setLen(lenBeforeSpace)
lex.nextImpl = lex.stored lex.nextImpl = blockStyleInline[T]
break outer break outer
of flowIndicators: of flowIndicators:
if lex.inFlow: if lex.inFlow:
lex.buf.setLen(lenBeforeSpace) lex.buf.setLen(lenBeforeSpace)
lex.nextImpl = lex.stored lex.nextImpl = blockStyleInline[T]
break outer break outer
else: else:
lex.buf.add(lex.c) lex.buf.add(lex.c)
@ -580,11 +625,10 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
else: break else: break
of lineEnd: of lineEnd:
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
break break
of flowIndicators: of flowIndicators:
if lex.inFlow: if lex.inFlow:
lex.nextImpl = lex.stored lex.nextImpl = blockStyleInline[T]
break break
of ':': of ':':
if not lex.nextIsPlainSafe(lex.inFlow): if not lex.nextIsPlainSafe(lex.inFlow):
@ -594,9 +638,58 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
t = ltScalarPart t = ltScalarPart
result = true result = true
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool = proc blockScalarHeader[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: blockScalarHeader")
lex.chomp = ctClip
lex.blockScalarIndent = UnknownIndentation
lex.folded = lex.c == '>'
while true:
lex.advance()
case lex.c
of '+':
if lex.chomp != ctClip:
raise lex.generateError("Only one chomping indicator is allowed")
lex.chomp = ctKeep
of '-':
if lex.chomp != ctClip:
raise lex.generateError("Only one chomping indicator is allowed")
lex.chomp = ctStrip
of '1'..'9':
if lex.blockScalarIndent != UnknownIndentation:
raise lex.generateError("Only one indentation indicator is allowed")
lex.blockScalarIndent = lex.indentation + ord(lex.c) - ord('\x30')
of spaceOrLineEnd: break
else:
raise lex.generateError(
"Illegal character in block scalar header: '" & escape("" & lex.c) &
'\'')
lex.nextImpl = expectLineEnd[T]
lex.inlineImpl = blockScalar[T]
t = ltBlockScalarHeader
result = true
proc blockScalar[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: blockScalarLine")
result = false result = false
if lex.blockScalarIndent == UnknownIndentation:
lex.blockScalarIndent = lex.indentation
elif lex.c == '#':
lex.nextImpl = expectLineEnd[T]
return false
elif lex.indentation < lex.blockScalarIndent:
raise lex.generateError("Too little indentation in block scalar")
elif lex.indentation > lex.blockScalarIndent or lex.c == '\t':
lex.moreIndented = true
lex.buf.addMultiple(' ', lex.indentation - lex.blockScalarIndent)
else: lex.moreIndented = false
while lex.c notin lineEnd:
lex.buf.add(lex.c)
lex.advance()
t = ltScalarPart
result = true
lex.nextImpl = expectLineEnd[T]
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool = proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: streamEnd")
t = ltStreamEnd t = ltStreamEnd
result = true result = true

View File

@ -2,7 +2,9 @@ import ../private/lex
import unittest, strutils import unittest, strutils
const tokensWithValue = [ltScalarPart, ltQuotedScalar] const tokensWithValue =
[ltScalarPart, ltQuotedScalar, ltYamlVersion, ltTagShorthand, ltTagUri,
ltUnknownDirective, ltUnknownDirectiveParams]
type type
TokenWithValue = object TokenWithValue = object
@ -11,26 +13,60 @@ type
value: string value: string
of ltIndentation: of ltIndentation:
indentation: int indentation: int
of ltBlockScalarHeader:
folded: bool
chomp: ChompType
else: discard else: discard
proc actualRepr(lex: YamlLexer, t: LexerToken): string =
result = $t
case t
of tokensWithValue:
result.add("(" & escape(lex.buf) & ")")
of ltIndentation:
result.add("(" & $lex.indentation & ")")
of ltBlockScalarHeader:
result.add("(" & $lex.folded & ", " & $lex.chomp & ")")
else: discard
proc assertEquals(input: string, expected: varargs[TokenWithValue]) = proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
let lex = newYamlLexer(input) let lex = newYamlLexer(input)
lex.init() lex.init()
var i = 0
var blockScalarEnd = -1
for expectedToken in expected: for expectedToken in expected:
let t = lex.next() inc(i)
doAssert t == expectedToken.kind, "Wrong token kind: Expected " & try:
$expectedToken.kind & ", got " & $t let t = lex.next()
case expectedToken.kind doAssert t == expectedToken.kind, "Wrong token kind at #" & $i &
of tokensWithValue: ": Expected " & $expectedToken.kind & ", got " & lex.actualRepr(t)
doAssert lex.buf == expectedToken.value, case expectedToken.kind
"Wrong token content: Expected " & escape(expectedToken.value) & of tokensWithValue:
", got " & escape(lex.buf) doAssert lex.buf == expectedToken.value, "Wrong token content at #" &
lex.buf = "" $i & ": Expected " & escape(expectedToken.value) &
of ltIndentation: ", got " & escape(lex.buf)
doAssert lex.indentation == expectedToken.indentation, lex.buf = ""
"Wrong indentation length: Expected " & $expectedToken.indentation & of ltIndentation:
", got " & $lex.indentation doAssert lex.indentation == expectedToken.indentation,
else: discard "Wrong indentation length at #" & $i & ": Expected " &
$expectedToken.indentation & ", got " & $lex.indentation
if lex.indentation <= blockScalarEnd:
lex.endBlockScalar()
blockScalarEnd = -1
of ltBlockScalarHeader:
doAssert lex.folded == expectedToken.folded,
"Wrong folded indicator at #" & $i & ": Expected " &
$expectedToken.folded & ", got " & $lex.folded
doAssert lex.chomp == expectedToken.chomp,
"Wrong chomp indicator at #" & $i & ": Expected " &
$expectedToken.chomp & ", got " & $lex.chomp
blockScalarEnd = lex.indentation
else: discard
except YamlLexerError:
let e = (ref YamlLexerError)(getCurrentException())
echo "Error at line " & $e.line & ", column " & $e.column & ":"
echo e.lineContent
assert false
proc i(indent: int): TokenWithValue = proc i(indent: int): TokenWithValue =
TokenWithValue(kind: ltIndentation, indentation: indent) TokenWithValue(kind: ltIndentation, indentation: indent)
@ -41,6 +77,24 @@ proc qs(v: string): TokenWithValue =
proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd) proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd)
proc mk(): TokenWithValue = TokenWithValue(kind: ltMapKeyInd) proc mk(): TokenWithValue = TokenWithValue(kind: ltMapKeyInd)
proc mv(): TokenWithValue = TokenWithValue(kind: ltMapValInd) proc mv(): TokenWithValue = TokenWithValue(kind: ltMapValInd)
proc si(): TokenWithValue = TokenWithValue(kind: ltSeqItemInd)
proc dy(): TokenWithValue = TokenWithValue(kind: ltYamlDirective)
proc dt(): TokenWithValue = TokenWithValue(kind: ltTagDirective)
proc du(v: string): TokenWithValue =
TokenWithValue(kind: ltUnknownDirective, value: v)
proc dp(v: string): TokenWithValue =
TokenWithValue(kind: ltUnknownDirectiveParams, value: v)
proc yv(v: string): TokenWithValue =
TokenWithValue(kind: ltYamlVersion, value: v)
proc ts(v: string): TokenWithValue =
TokenWithValue(kind: ltTagShorthand, value: v)
proc tu(v: string): TokenWithValue =
TokenWithValue(kind: ltTagUri, value: v)
proc dirE(): TokenWithValue = TokenWithValue(kind: ltDirectivesEnd)
proc docE(): TokenWithValue = TokenWithValue(kind: ltDocumentEnd)
proc bs(folded: bool, chomp: ChompType): TokenWithValue =
TokenWithValue(kind: ltBlockScalarHeader, folded: folded, chomp: chomp)
proc el(): TokenWithValue = TokenWithValue(kind: ltEmptyLine)
suite "Lexer": suite "Lexer":
test "Empty document": test "Empty document":
@ -64,6 +118,9 @@ suite "Lexer":
assertEquals("? key\n: value", i(0), mk(), sp("key"), i(0), mv(), assertEquals("? key\n: value", i(0), mk(), sp("key"), i(0), mv(),
sp("value"), se()) sp("value"), se())
test "Sequence":
assertEquals("- a\n- b", i(0), si(), sp("a"), i(0), si(), sp("b"), se())
test "Single-line single-quoted scalar": test "Single-line single-quoted scalar":
assertEquals("'quoted scalar'", i(0), qs("quoted scalar"), se()) assertEquals("'quoted scalar'", i(0), qs("quoted scalar"), se())
@ -79,4 +136,23 @@ suite "Lexer":
qs("quoted multi line\lscalar"), se()) qs("quoted multi line\lscalar"), se())
test "Escape sequences": test "Escape sequences":
assertEquals(""""\n\x31\u0032\U00000033"""", i(0), qs("\l123"), se()) assertEquals(""""\n\x31\u0032\U00000033"""", i(0), qs("\l123"), se())
test "Directives":
assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html",
dy(), yv("1.2"), dirE(), i(0), sp("%TAG"), i(0), docE(), dt(),
ts("!"), tu("example.html"), se())
test "Markers and Unknown Directive":
assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(), i(0),
docE(), du("UNKNOWN"), dp("warbl"), se())
test "Block scalar":
assertEquals("|\l a\l\l b\l # comment", i(0), bs(false, ctClip), i(2),
sp("a"), el(), i(2), sp("b"), i(1), se())
test "Block Scalars":
assertEquals("one : >2-\l foo\l bar\ltwo: |+\l bar\l baz", i(0),
sp("one"), mv(), bs(true, ctStrip), i(3), sp(" foo"), i(2), sp("bar"),
i(0), sp("two"), mv(), bs(false, ctKeep), i(1), sp("bar"), i(2),
sp(" baz"), se())