mirror of https://github.com/status-im/NimYAML.git
lexer: worked on block scalars and stuff
This commit is contained in:
parent
79f432a27d
commit
ba02b41555
215
private/lex.nim
215
private/lex.nim
|
@ -5,6 +5,9 @@
|
||||||
# distribution, for details about the copyright.
|
# distribution, for details about the copyright.
|
||||||
|
|
||||||
import lexbase, streams, strutils, unicode
|
import lexbase, streams, strutils, unicode
|
||||||
|
when defined(yamlDebug):
|
||||||
|
import terminal
|
||||||
|
export terminal
|
||||||
|
|
||||||
type
|
type
|
||||||
StringSource* = object
|
StringSource* = object
|
||||||
|
@ -22,24 +25,30 @@ type
|
||||||
source: T
|
source: T
|
||||||
inFlow: bool
|
inFlow: bool
|
||||||
literalEndIndent: int
|
literalEndIndent: int
|
||||||
nextImpl, stored: LexerState[T]
|
nextImpl, lineStartImpl, inlineImpl: LexerState[T]
|
||||||
buf*: string not nil
|
buf*: string not nil
|
||||||
indentation*: int
|
indentation*: int
|
||||||
|
blockScalarIndent: int
|
||||||
|
moreIndented*, folded*: bool
|
||||||
|
chomp*: ChompType
|
||||||
c: char
|
c: char
|
||||||
|
|
||||||
LexerState[T] = proc(lex: YamlLexer[T], t: var LexerToken): bool
|
LexerState[T] = proc(lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
|
||||||
LexerToken* = enum
|
LexerToken* = enum
|
||||||
ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand,
|
ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand,
|
||||||
ltTagUrl, ltUnknownDirective, ltUnknownDirectiveParams,
|
ltTagUri, ltUnknownDirective, ltUnknownDirectiveParams, ltEmptyLine,
|
||||||
ltDirectivesEnd, ltDocumentEnd, ltStreamEnd, ltIndentation, ltQuotedScalar,
|
ltDirectivesEnd, ltDocumentEnd, ltStreamEnd, ltIndentation, ltQuotedScalar,
|
||||||
ltScalarPart, ltEmptyLine, ltSeqItemInd, ltMapKeyInd, ltMapValInd,
|
ltScalarPart, ltBlockScalarHeader, ltSeqItemInd, ltMapKeyInd, ltMapValInd,
|
||||||
ltBraceOpen, ltBraceClose, ltBracketOpen, ltBracketClose, ltComma,
|
ltBraceOpen, ltBraceClose, ltBracketOpen, ltBracketClose, ltComma,
|
||||||
ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias
|
ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias
|
||||||
|
|
||||||
YamlLexerError* = object of Exception
|
YamlLexerError* = object of Exception
|
||||||
line, column: int
|
line*, column*: int
|
||||||
lineContent: string
|
lineContent*: string
|
||||||
|
|
||||||
|
ChompType* = enum
|
||||||
|
ctKeep, ctClip, ctStrip
|
||||||
|
|
||||||
# templates
|
# templates
|
||||||
|
|
||||||
|
@ -52,6 +61,14 @@ proc advance(lex: YamlLexer[StringSource], step: int = 1) {.inline.} =
|
||||||
if lex.source.pos >= lex.source.src.len: lex.c = EndOfFile
|
if lex.source.pos >= lex.source.src.len: lex.c = EndOfFile
|
||||||
else: lex.c = lex.source.src[lex.source.pos]
|
else: lex.c = lex.source.src[lex.source.pos]
|
||||||
|
|
||||||
|
proc peek(lex: YamlLexer[StringSource], at: int = 1): char {.inline.} =
|
||||||
|
let index = lex.source.pos + at
|
||||||
|
if index >= lex.source.src.len: result = EndOfFile
|
||||||
|
else: result = lex.source.src[index]
|
||||||
|
|
||||||
|
proc peek(lex: YamlLexer[BaseLexer], at: int = 1): char {.inline.} =
|
||||||
|
lex.source.buf[lex.source.bufpos + at]
|
||||||
|
|
||||||
# lexer states
|
# lexer states
|
||||||
|
|
||||||
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
@ -60,10 +77,14 @@ proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
proc afterSeqInd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
|
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
|
||||||
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc blockScalarHeader[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
proc blockScalar[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||||
|
|
||||||
# interface
|
# interface
|
||||||
|
@ -81,10 +102,20 @@ proc newYamlLexer*(source: string, startAt: int = 0):
|
||||||
|
|
||||||
proc init*[T](lex: YamlLexer[T]) =
|
proc init*[T](lex: YamlLexer[T]) =
|
||||||
lex.nextImpl = outsideDoc[T]
|
lex.nextImpl = outsideDoc[T]
|
||||||
|
lex.lineStartImpl = outsideDoc[T]
|
||||||
|
lex.inlineImpl = blockStyleInline[T]
|
||||||
|
|
||||||
proc next*(lex: YamlLexer): LexerToken =
|
proc next*(lex: YamlLexer): LexerToken =
|
||||||
while not lex.nextImpl(lex, result): discard
|
while not lex.nextImpl(lex, result): discard
|
||||||
|
|
||||||
|
proc setFlow*[T](lex: YamlLexer[T], value: bool) =
|
||||||
|
lex.inFlow = value
|
||||||
|
|
||||||
|
proc endBlockScalar*[T](lex: YamlLexer[T]) =
|
||||||
|
assert lex.nextImpl == blockScalar[T], "Expected blockScalar, got " & lex.nextImpl.repr
|
||||||
|
lex.inlineImpl = blockStyleInline[T]
|
||||||
|
lex.nextImpl = blockStyleInline[T]
|
||||||
|
|
||||||
# implementation
|
# implementation
|
||||||
|
|
||||||
const
|
const
|
||||||
|
@ -98,6 +129,8 @@ const
|
||||||
UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
|
UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
|
||||||
UTF8LineSeparator = toUTF8(0x2028.Rune)
|
UTF8LineSeparator = toUTF8(0x2028.Rune)
|
||||||
UTF8ParagraphSeparator = toUTF8(0x2029.Rune)
|
UTF8ParagraphSeparator = toUTF8(0x2029.Rune)
|
||||||
|
|
||||||
|
UnknownIndentation = int.low
|
||||||
|
|
||||||
template debug(message: string) {.dirty.} =
|
template debug(message: string) {.dirty.} =
|
||||||
when defined(yamlDebug):
|
when defined(yamlDebug):
|
||||||
|
@ -146,7 +179,7 @@ template currentLine(lex: YamlLexer[StringSource]): string =
|
||||||
while lex.source.src[i] notin lineEnd:
|
while lex.source.src[i] notin lineEnd:
|
||||||
result.add(lex.source.src[i])
|
result.add(lex.source.src[i])
|
||||||
inc(i)
|
inc(i)
|
||||||
result.add("\n" & spaces(lex.columnNumber) & "^\n")
|
result.add("\n" & spaces(lex.columnNumber - 1) & "^\n")
|
||||||
result
|
result
|
||||||
|
|
||||||
proc generateError(lex: YamlLexer, message: string):
|
proc generateError(lex: YamlLexer, message: string):
|
||||||
|
@ -183,7 +216,6 @@ proc yamlVersion[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
raise lex.generateError("Invalid YAML version number")
|
raise lex.generateError("Invalid YAML version number")
|
||||||
t = ltYamlVersion
|
t = ltYamlVersion
|
||||||
result = true
|
result = true
|
||||||
lex.stored = outsideDoc[T]
|
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
|
||||||
template nextIsPlainSafe(lex: YamlLexer[BaseLexer], inFlow: bool): bool =
|
template nextIsPlainSafe(lex: YamlLexer[BaseLexer], inFlow: bool): bool =
|
||||||
|
@ -219,6 +251,8 @@ proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
lex.advance()
|
lex.advance()
|
||||||
if lex.c notin spaceOrLineEnd:
|
if lex.c notin spaceOrLineEnd:
|
||||||
raise lex.generateError("Missing space after tag shorthand")
|
raise lex.generateError("Missing space after tag shorthand")
|
||||||
|
t = ltTagShorthand
|
||||||
|
result = true
|
||||||
lex.nextImpl = tagUri[T]
|
lex.nextImpl = tagUri[T]
|
||||||
|
|
||||||
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
@ -236,12 +270,18 @@ proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
lex.advance()
|
lex.advance()
|
||||||
else: raise lex.generateError("Invalid character in tag uri: " &
|
else: raise lex.generateError("Invalid character in tag uri: " &
|
||||||
escape("" & lex.c))
|
escape("" & lex.c))
|
||||||
|
t = ltTagUri
|
||||||
|
result = true
|
||||||
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
|
||||||
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
while lex.c notin lineEnd + {'#'}: lex.advance()
|
debug("lex: unknownDirParams")
|
||||||
|
while lex.c in space: lex.advance()
|
||||||
|
while lex.c notin lineEnd + {'#'}:
|
||||||
|
lex.buf.add(lex.c)
|
||||||
|
lex.advance()
|
||||||
t = ltUnknownDirectiveParams
|
t = ltUnknownDirectiveParams
|
||||||
result = true
|
result = true
|
||||||
lex.stored = outsideDoc[T]
|
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
|
||||||
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
@ -258,17 +298,19 @@ proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
break
|
break
|
||||||
of '\l':
|
of '\l':
|
||||||
lex.lexLF()
|
lex.lexLF()
|
||||||
lex.nextImpl = lex.stored
|
lex.nextImpl = lex.lineStartImpl
|
||||||
break
|
break
|
||||||
of '\c':
|
of '\c':
|
||||||
lex.lexCR()
|
lex.lexCR()
|
||||||
lex.nextImpl = lex.stored
|
lex.nextImpl = lex.lineStartImpl
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise lex.generateError("Unexpected character (expected line end): " &
|
raise lex.generateError("Unexpected character (expected line end): " &
|
||||||
escape("" & lex.c))
|
escape("" & lex.c))
|
||||||
|
|
||||||
proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken) =
|
proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: possibleDirectivesEnd")
|
||||||
|
lex.lineStartImpl = blockStyle[T]
|
||||||
lex.advance()
|
lex.advance()
|
||||||
if lex.c == '-':
|
if lex.c == '-':
|
||||||
lex.advance()
|
lex.advance()
|
||||||
|
@ -277,20 +319,26 @@ proc possibleDirectivesEnd[T](lex: YamlLexer[T], t: var LexerToken) =
|
||||||
if lex.c in spaceOrLineEnd:
|
if lex.c in spaceOrLineEnd:
|
||||||
t = ltDirectivesEnd
|
t = ltDirectivesEnd
|
||||||
lex.nextImpl = blockStyleInline[T]
|
lex.nextImpl = blockStyleInline[T]
|
||||||
return
|
return true
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
elif lex.c in spaceOrLineEnd:
|
elif lex.c in spaceOrLineEnd:
|
||||||
lex.advance()
|
lex.indentation = 0
|
||||||
t = ltSeqItemInd
|
t = ltIndentation
|
||||||
lex.nextImpl = blockStyleInline[T]
|
lex.nextImpl = afterSeqInd[T]
|
||||||
return
|
return true
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
lex.nextImpl = plainScalarPart[T]
|
lex.nextImpl = plainScalarPart[T]
|
||||||
lex.indentation = 0
|
result = false
|
||||||
t = ltIndentation
|
|
||||||
|
|
||||||
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken) =
|
proc afterSeqInd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
result = true
|
||||||
|
t = ltSeqItemInd
|
||||||
|
if lex.c notin lineEnd: lex.advance()
|
||||||
|
lex.nextImpl = blockStyleInline[T]
|
||||||
|
|
||||||
|
proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: possibleDocumentEnd")
|
||||||
lex.advance()
|
lex.advance()
|
||||||
if lex.c == '.':
|
if lex.c == '.':
|
||||||
lex.advance()
|
lex.advance()
|
||||||
|
@ -299,16 +347,16 @@ proc possibleDocumentEnd[T](lex: YamlLexer[T], t: var LexerToken) =
|
||||||
if lex.c in spaceOrLineEnd:
|
if lex.c in spaceOrLineEnd:
|
||||||
t = ltDocumentEnd
|
t = ltDocumentEnd
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = outsideDoc[T]
|
lex.lineStartImpl = outsideDoc[T]
|
||||||
return
|
return true
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
lex.nextImpl = plainScalarPart[T]
|
lex.nextImpl = plainScalarPart[T]
|
||||||
lex.indentation = 0
|
result = false
|
||||||
t = ltIndentation
|
|
||||||
|
|
||||||
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: outsideDoc")
|
||||||
case lex.c
|
case lex.c
|
||||||
of '%':
|
of '%':
|
||||||
lex.advance()
|
lex.advance()
|
||||||
|
@ -316,8 +364,10 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
case lex.buf
|
case lex.buf
|
||||||
of "YAML":
|
of "YAML":
|
||||||
t = ltYamlDirective
|
t = ltYamlDirective
|
||||||
|
lex.buf.setLen(0)
|
||||||
lex.nextImpl = yamlVersion[T]
|
lex.nextImpl = yamlVersion[T]
|
||||||
of "TAG":
|
of "TAG":
|
||||||
|
lex.buf.setLen(0)
|
||||||
t = ltTagDirective
|
t = ltTagDirective
|
||||||
lex.nextImpl = tagShorthand[T]
|
lex.nextImpl = tagShorthand[T]
|
||||||
else:
|
else:
|
||||||
|
@ -325,11 +375,11 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
lex.nextImpl = unknownDirParams[T]
|
lex.nextImpl = unknownDirParams[T]
|
||||||
return true
|
return true
|
||||||
of '-':
|
of '-':
|
||||||
lex.possibleDirectivesEnd(t)
|
lex.nextImpl = possibleDirectivesEnd[T]
|
||||||
return true
|
return false
|
||||||
of '.':
|
of '.':
|
||||||
lex.possibleDocumentEnd(t)
|
lex.indentation = 0
|
||||||
return true
|
lex.nextImpl = possibleDocumentEnd[T]
|
||||||
of spaceOrLineEnd + {'#'}:
|
of spaceOrLineEnd + {'#'}:
|
||||||
lex.indentation = 0
|
lex.indentation = 0
|
||||||
while lex.c == ' ':
|
while lex.c == ' ':
|
||||||
|
@ -337,33 +387,34 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
lex.advance()
|
lex.advance()
|
||||||
if lex.c in spaceOrLineEnd + {'#'}:
|
if lex.c in spaceOrLineEnd + {'#'}:
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = outsideDoc[T]
|
|
||||||
return false
|
return false
|
||||||
else: lex.indentation = 0
|
lex.nextImpl = blockStyleInline[T]
|
||||||
lex.nextImpl = blockStyleInline[T]
|
else:
|
||||||
|
lex.indentation = 0
|
||||||
|
lex.nextImpl = blockStyleInline[T]
|
||||||
|
lex.lineStartImpl = blockStyle[T]
|
||||||
t = ltIndentation
|
t = ltIndentation
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: blockStyle")
|
||||||
|
lex.indentation = 0
|
||||||
case lex.c
|
case lex.c
|
||||||
of '-':
|
of '-':
|
||||||
lex.possibleDirectivesEnd(t)
|
lex.nextImpl = possibleDirectivesEnd[T]
|
||||||
return true
|
return false
|
||||||
of '.':
|
of '.': lex.nextImpl = possibleDocumentEnd[T]
|
||||||
lex.possibleDocumentEnd(t)
|
of spaceOrLineEnd:
|
||||||
return true
|
|
||||||
of spaceOrLineEnd + {'#'}:
|
|
||||||
lex.indentation = 0
|
|
||||||
while lex.c == ' ':
|
while lex.c == ' ':
|
||||||
lex.indentation.inc()
|
lex.indentation.inc()
|
||||||
lex.advance()
|
lex.advance()
|
||||||
if lex.c in spaceOrLineEnd + {'#'}:
|
if lex.c in spaceOrLineEnd:
|
||||||
lex.nextImpl = expectLineEnd[T]
|
|
||||||
lex.stored = blockStyle[T]
|
|
||||||
t = ltEmptyLine
|
t = ltEmptyLine
|
||||||
|
lex.nextImpl = expectLineEnd[T]
|
||||||
return true
|
return true
|
||||||
else: lex.indentation = 0
|
else:
|
||||||
lex.nextImpl = blockStyleInline[T]
|
lex.nextImpl = lex.inlineImpl
|
||||||
|
else: lex.nextImpl = lex.inlineImpl
|
||||||
t = ltIndentation
|
t = ltIndentation
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
|
@ -371,7 +422,6 @@ proc possibleIndicatorChar[T](lex: YamlLexer[T], indicator: LexerToken,
|
||||||
t: var LexerToken): bool =
|
t: var LexerToken): bool =
|
||||||
if lex.nextIsPlainSafe(false):
|
if lex.nextIsPlainSafe(false):
|
||||||
lex.nextImpl = plainScalarPart[T]
|
lex.nextImpl = plainScalarPart[T]
|
||||||
lex.stored = blockStyleInline[T]
|
|
||||||
result = false
|
result = false
|
||||||
else:
|
else:
|
||||||
t = indicator
|
t = indicator
|
||||||
|
@ -380,7 +430,6 @@ proc possibleIndicatorChar[T](lex: YamlLexer[T], indicator: LexerToken,
|
||||||
while lex.c in space: lex.advance()
|
while lex.c in space: lex.advance()
|
||||||
if lex.c in lineEnd:
|
if lex.c in lineEnd:
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = blockStyle[T]
|
|
||||||
|
|
||||||
proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
|
proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
|
||||||
t: var LexerToken, inFlow: static[bool]): bool {.inline.} =
|
t: var LexerToken, inFlow: static[bool]): bool {.inline.} =
|
||||||
|
@ -389,12 +438,9 @@ proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
|
||||||
while lex.c in space: lex.advance()
|
while lex.c in space: lex.advance()
|
||||||
if lex.c in lineEnd:
|
if lex.c in lineEnd:
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
when inFlow: lex.stored = flowStyle[T]
|
|
||||||
else: lex.stored = blockStyle[T]
|
|
||||||
|
|
||||||
proc addMultiple(s: var string, c: char, num: int) {.raises: [], inline.} =
|
proc addMultiple(s: var string, c: char, num: int) {.raises: [], inline.} =
|
||||||
for i in 1..num:
|
for i in 1..num: s.add(c)
|
||||||
s.add(c)
|
|
||||||
|
|
||||||
proc processQuotedWhitespace(lex: YamlLexer, newlines: var int) =
|
proc processQuotedWhitespace(lex: YamlLexer, newlines: var int) =
|
||||||
block outer:
|
block outer:
|
||||||
|
@ -515,6 +561,7 @@ proc doubleQuotedScalar[T](lex: YamlLexer[T]) =
|
||||||
lex.advance()
|
lex.advance()
|
||||||
|
|
||||||
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: blockStyleInline")
|
||||||
case lex.c
|
case lex.c
|
||||||
of ':': result = lex.possibleIndicatorChar(ltMapValInd, t)
|
of ':': result = lex.possibleIndicatorChar(ltMapValInd, t)
|
||||||
of '?': result = lex.possibleIndicatorChar(ltMapKeyInd, t)
|
of '?': result = lex.possibleIndicatorChar(ltMapKeyInd, t)
|
||||||
|
@ -522,7 +569,6 @@ proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
of lineEnd + {'#'}:
|
of lineEnd + {'#'}:
|
||||||
result = false
|
result = false
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = blockStyle[T]
|
|
||||||
of '\"':
|
of '\"':
|
||||||
lex.doubleQuotedScalar()
|
lex.doubleQuotedScalar()
|
||||||
t = ltQuotedScalar
|
t = ltQuotedScalar
|
||||||
|
@ -532,19 +578,19 @@ proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
t = ltQuotedScalar
|
t = ltQuotedScalar
|
||||||
result = true
|
result = true
|
||||||
of '>', '|':
|
of '>', '|':
|
||||||
# TODO
|
if lex.inFlow: lex.nextImpl = plainScalarPart[T]
|
||||||
result = true
|
else: lex.nextImpl = blockScalarHeader[T]
|
||||||
|
result = false
|
||||||
of '{': result = lex.flowIndicator(ltBraceOpen, t, false)
|
of '{': result = lex.flowIndicator(ltBraceOpen, t, false)
|
||||||
of '}': result = lex.flowIndicator(ltBraceClose, t, false)
|
of '}': result = lex.flowIndicator(ltBraceClose, t, false)
|
||||||
of '[': result = lex.flowIndicator(ltBracketOpen, t, false)
|
of '[': result = lex.flowIndicator(ltBracketOpen, t, false)
|
||||||
of ']': result = lex.flowIndicator(ltBracketClose, t, false)
|
of ']': result = lex.flowIndicator(ltBracketClose, t, false)
|
||||||
else:
|
else:
|
||||||
lex.nextImpl = plainScalarPart[T]
|
lex.nextImpl = plainScalarPart[T]
|
||||||
lex.stored = blockStyleInline[T]
|
|
||||||
result = false
|
result = false
|
||||||
|
|
||||||
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
debug("lex: plainScalar")
|
debug("lex: plainScalarPart")
|
||||||
block outer:
|
block outer:
|
||||||
while true:
|
while true:
|
||||||
lex.buf.add(lex.c)
|
lex.buf.add(lex.c)
|
||||||
|
@ -559,18 +605,17 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
of lineEnd + {'#'}:
|
of lineEnd + {'#'}:
|
||||||
lex.buf.setLen(lenBeforeSpace)
|
lex.buf.setLen(lenBeforeSpace)
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
|
|
||||||
break outer
|
break outer
|
||||||
of ':':
|
of ':':
|
||||||
if lex.nextIsPlainSafe(lex.inFlow): break
|
if lex.nextIsPlainSafe(lex.inFlow): break
|
||||||
else:
|
else:
|
||||||
lex.buf.setLen(lenBeforeSpace)
|
lex.buf.setLen(lenBeforeSpace)
|
||||||
lex.nextImpl = lex.stored
|
lex.nextImpl = blockStyleInline[T]
|
||||||
break outer
|
break outer
|
||||||
of flowIndicators:
|
of flowIndicators:
|
||||||
if lex.inFlow:
|
if lex.inFlow:
|
||||||
lex.buf.setLen(lenBeforeSpace)
|
lex.buf.setLen(lenBeforeSpace)
|
||||||
lex.nextImpl = lex.stored
|
lex.nextImpl = blockStyleInline[T]
|
||||||
break outer
|
break outer
|
||||||
else:
|
else:
|
||||||
lex.buf.add(lex.c)
|
lex.buf.add(lex.c)
|
||||||
|
@ -580,11 +625,10 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
else: break
|
else: break
|
||||||
of lineEnd:
|
of lineEnd:
|
||||||
lex.nextImpl = expectLineEnd[T]
|
lex.nextImpl = expectLineEnd[T]
|
||||||
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
|
|
||||||
break
|
break
|
||||||
of flowIndicators:
|
of flowIndicators:
|
||||||
if lex.inFlow:
|
if lex.inFlow:
|
||||||
lex.nextImpl = lex.stored
|
lex.nextImpl = blockStyleInline[T]
|
||||||
break
|
break
|
||||||
of ':':
|
of ':':
|
||||||
if not lex.nextIsPlainSafe(lex.inFlow):
|
if not lex.nextIsPlainSafe(lex.inFlow):
|
||||||
|
@ -594,9 +638,58 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
t = ltScalarPart
|
t = ltScalarPart
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc blockScalarHeader[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: blockScalarHeader")
|
||||||
|
lex.chomp = ctClip
|
||||||
|
lex.blockScalarIndent = UnknownIndentation
|
||||||
|
lex.folded = lex.c == '>'
|
||||||
|
while true:
|
||||||
|
lex.advance()
|
||||||
|
case lex.c
|
||||||
|
of '+':
|
||||||
|
if lex.chomp != ctClip:
|
||||||
|
raise lex.generateError("Only one chomping indicator is allowed")
|
||||||
|
lex.chomp = ctKeep
|
||||||
|
of '-':
|
||||||
|
if lex.chomp != ctClip:
|
||||||
|
raise lex.generateError("Only one chomping indicator is allowed")
|
||||||
|
lex.chomp = ctStrip
|
||||||
|
of '1'..'9':
|
||||||
|
if lex.blockScalarIndent != UnknownIndentation:
|
||||||
|
raise lex.generateError("Only one indentation indicator is allowed")
|
||||||
|
lex.blockScalarIndent = lex.indentation + ord(lex.c) - ord('\x30')
|
||||||
|
of spaceOrLineEnd: break
|
||||||
|
else:
|
||||||
|
raise lex.generateError(
|
||||||
|
"Illegal character in block scalar header: '" & escape("" & lex.c) &
|
||||||
|
'\'')
|
||||||
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
lex.inlineImpl = blockScalar[T]
|
||||||
|
t = ltBlockScalarHeader
|
||||||
|
result = true
|
||||||
|
|
||||||
|
proc blockScalar[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: blockScalarLine")
|
||||||
result = false
|
result = false
|
||||||
|
if lex.blockScalarIndent == UnknownIndentation:
|
||||||
|
lex.blockScalarIndent = lex.indentation
|
||||||
|
elif lex.c == '#':
|
||||||
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
return false
|
||||||
|
elif lex.indentation < lex.blockScalarIndent:
|
||||||
|
raise lex.generateError("Too little indentation in block scalar")
|
||||||
|
elif lex.indentation > lex.blockScalarIndent or lex.c == '\t':
|
||||||
|
lex.moreIndented = true
|
||||||
|
lex.buf.addMultiple(' ', lex.indentation - lex.blockScalarIndent)
|
||||||
|
else: lex.moreIndented = false
|
||||||
|
while lex.c notin lineEnd:
|
||||||
|
lex.buf.add(lex.c)
|
||||||
|
lex.advance()
|
||||||
|
t = ltScalarPart
|
||||||
|
result = true
|
||||||
|
lex.nextImpl = expectLineEnd[T]
|
||||||
|
|
||||||
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||||
|
debug("lex: streamEnd")
|
||||||
t = ltStreamEnd
|
t = ltStreamEnd
|
||||||
result = true
|
result = true
|
108
test/tlex.nim
108
test/tlex.nim
|
@ -2,7 +2,9 @@ import ../private/lex
|
||||||
|
|
||||||
import unittest, strutils
|
import unittest, strutils
|
||||||
|
|
||||||
const tokensWithValue = [ltScalarPart, ltQuotedScalar]
|
const tokensWithValue =
|
||||||
|
[ltScalarPart, ltQuotedScalar, ltYamlVersion, ltTagShorthand, ltTagUri,
|
||||||
|
ltUnknownDirective, ltUnknownDirectiveParams]
|
||||||
|
|
||||||
type
|
type
|
||||||
TokenWithValue = object
|
TokenWithValue = object
|
||||||
|
@ -11,26 +13,60 @@ type
|
||||||
value: string
|
value: string
|
||||||
of ltIndentation:
|
of ltIndentation:
|
||||||
indentation: int
|
indentation: int
|
||||||
|
of ltBlockScalarHeader:
|
||||||
|
folded: bool
|
||||||
|
chomp: ChompType
|
||||||
else: discard
|
else: discard
|
||||||
|
|
||||||
|
proc actualRepr(lex: YamlLexer, t: LexerToken): string =
|
||||||
|
result = $t
|
||||||
|
case t
|
||||||
|
of tokensWithValue:
|
||||||
|
result.add("(" & escape(lex.buf) & ")")
|
||||||
|
of ltIndentation:
|
||||||
|
result.add("(" & $lex.indentation & ")")
|
||||||
|
of ltBlockScalarHeader:
|
||||||
|
result.add("(" & $lex.folded & ", " & $lex.chomp & ")")
|
||||||
|
else: discard
|
||||||
|
|
||||||
proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
|
proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
|
||||||
let lex = newYamlLexer(input)
|
let lex = newYamlLexer(input)
|
||||||
lex.init()
|
lex.init()
|
||||||
|
var i = 0
|
||||||
|
var blockScalarEnd = -1
|
||||||
for expectedToken in expected:
|
for expectedToken in expected:
|
||||||
let t = lex.next()
|
inc(i)
|
||||||
doAssert t == expectedToken.kind, "Wrong token kind: Expected " &
|
try:
|
||||||
$expectedToken.kind & ", got " & $t
|
let t = lex.next()
|
||||||
case expectedToken.kind
|
doAssert t == expectedToken.kind, "Wrong token kind at #" & $i &
|
||||||
of tokensWithValue:
|
": Expected " & $expectedToken.kind & ", got " & lex.actualRepr(t)
|
||||||
doAssert lex.buf == expectedToken.value,
|
case expectedToken.kind
|
||||||
"Wrong token content: Expected " & escape(expectedToken.value) &
|
of tokensWithValue:
|
||||||
", got " & escape(lex.buf)
|
doAssert lex.buf == expectedToken.value, "Wrong token content at #" &
|
||||||
lex.buf = ""
|
$i & ": Expected " & escape(expectedToken.value) &
|
||||||
of ltIndentation:
|
", got " & escape(lex.buf)
|
||||||
doAssert lex.indentation == expectedToken.indentation,
|
lex.buf = ""
|
||||||
"Wrong indentation length: Expected " & $expectedToken.indentation &
|
of ltIndentation:
|
||||||
", got " & $lex.indentation
|
doAssert lex.indentation == expectedToken.indentation,
|
||||||
else: discard
|
"Wrong indentation length at #" & $i & ": Expected " &
|
||||||
|
$expectedToken.indentation & ", got " & $lex.indentation
|
||||||
|
if lex.indentation <= blockScalarEnd:
|
||||||
|
lex.endBlockScalar()
|
||||||
|
blockScalarEnd = -1
|
||||||
|
of ltBlockScalarHeader:
|
||||||
|
doAssert lex.folded == expectedToken.folded,
|
||||||
|
"Wrong folded indicator at #" & $i & ": Expected " &
|
||||||
|
$expectedToken.folded & ", got " & $lex.folded
|
||||||
|
doAssert lex.chomp == expectedToken.chomp,
|
||||||
|
"Wrong chomp indicator at #" & $i & ": Expected " &
|
||||||
|
$expectedToken.chomp & ", got " & $lex.chomp
|
||||||
|
blockScalarEnd = lex.indentation
|
||||||
|
else: discard
|
||||||
|
except YamlLexerError:
|
||||||
|
let e = (ref YamlLexerError)(getCurrentException())
|
||||||
|
echo "Error at line " & $e.line & ", column " & $e.column & ":"
|
||||||
|
echo e.lineContent
|
||||||
|
assert false
|
||||||
|
|
||||||
proc i(indent: int): TokenWithValue =
|
proc i(indent: int): TokenWithValue =
|
||||||
TokenWithValue(kind: ltIndentation, indentation: indent)
|
TokenWithValue(kind: ltIndentation, indentation: indent)
|
||||||
|
@ -41,6 +77,24 @@ proc qs(v: string): TokenWithValue =
|
||||||
proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd)
|
proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd)
|
||||||
proc mk(): TokenWithValue = TokenWithValue(kind: ltMapKeyInd)
|
proc mk(): TokenWithValue = TokenWithValue(kind: ltMapKeyInd)
|
||||||
proc mv(): TokenWithValue = TokenWithValue(kind: ltMapValInd)
|
proc mv(): TokenWithValue = TokenWithValue(kind: ltMapValInd)
|
||||||
|
proc si(): TokenWithValue = TokenWithValue(kind: ltSeqItemInd)
|
||||||
|
proc dy(): TokenWithValue = TokenWithValue(kind: ltYamlDirective)
|
||||||
|
proc dt(): TokenWithValue = TokenWithValue(kind: ltTagDirective)
|
||||||
|
proc du(v: string): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltUnknownDirective, value: v)
|
||||||
|
proc dp(v: string): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltUnknownDirectiveParams, value: v)
|
||||||
|
proc yv(v: string): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltYamlVersion, value: v)
|
||||||
|
proc ts(v: string): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltTagShorthand, value: v)
|
||||||
|
proc tu(v: string): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltTagUri, value: v)
|
||||||
|
proc dirE(): TokenWithValue = TokenWithValue(kind: ltDirectivesEnd)
|
||||||
|
proc docE(): TokenWithValue = TokenWithValue(kind: ltDocumentEnd)
|
||||||
|
proc bs(folded: bool, chomp: ChompType): TokenWithValue =
|
||||||
|
TokenWithValue(kind: ltBlockScalarHeader, folded: folded, chomp: chomp)
|
||||||
|
proc el(): TokenWithValue = TokenWithValue(kind: ltEmptyLine)
|
||||||
|
|
||||||
suite "Lexer":
|
suite "Lexer":
|
||||||
test "Empty document":
|
test "Empty document":
|
||||||
|
@ -64,6 +118,9 @@ suite "Lexer":
|
||||||
assertEquals("? key\n: value", i(0), mk(), sp("key"), i(0), mv(),
|
assertEquals("? key\n: value", i(0), mk(), sp("key"), i(0), mv(),
|
||||||
sp("value"), se())
|
sp("value"), se())
|
||||||
|
|
||||||
|
test "Sequence":
|
||||||
|
assertEquals("- a\n- b", i(0), si(), sp("a"), i(0), si(), sp("b"), se())
|
||||||
|
|
||||||
test "Single-line single-quoted scalar":
|
test "Single-line single-quoted scalar":
|
||||||
assertEquals("'quoted scalar'", i(0), qs("quoted scalar"), se())
|
assertEquals("'quoted scalar'", i(0), qs("quoted scalar"), se())
|
||||||
|
|
||||||
|
@ -79,4 +136,23 @@ suite "Lexer":
|
||||||
qs("quoted multi line\lscalar"), se())
|
qs("quoted multi line\lscalar"), se())
|
||||||
|
|
||||||
test "Escape sequences":
|
test "Escape sequences":
|
||||||
assertEquals(""""\n\x31\u0032\U00000033"""", i(0), qs("\l123"), se())
|
assertEquals(""""\n\x31\u0032\U00000033"""", i(0), qs("\l123"), se())
|
||||||
|
|
||||||
|
test "Directives":
|
||||||
|
assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html",
|
||||||
|
dy(), yv("1.2"), dirE(), i(0), sp("%TAG"), i(0), docE(), dt(),
|
||||||
|
ts("!"), tu("example.html"), se())
|
||||||
|
|
||||||
|
test "Markers and Unknown Directive":
|
||||||
|
assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(), i(0),
|
||||||
|
docE(), du("UNKNOWN"), dp("warbl"), se())
|
||||||
|
|
||||||
|
test "Block scalar":
|
||||||
|
assertEquals("|\l a\l\l b\l # comment", i(0), bs(false, ctClip), i(2),
|
||||||
|
sp("a"), el(), i(2), sp("b"), i(1), se())
|
||||||
|
|
||||||
|
test "Block Scalars":
|
||||||
|
assertEquals("one : >2-\l foo\l bar\ltwo: |+\l bar\l baz", i(0),
|
||||||
|
sp("one"), mv(), bs(true, ctStrip), i(3), sp(" foo"), i(2), sp("bar"),
|
||||||
|
i(0), sp("two"), mv(), bs(false, ctKeep), i(1), sp("bar"), i(2),
|
||||||
|
sp(" baz"), se())
|
Loading…
Reference in New Issue