mirror of
https://github.com/status-im/NimYAML.git
synced 2025-02-04 07:24:37 +00:00
lexer: implemented quoted scalars
This commit is contained in:
parent
68a157e173
commit
79f432a27d
150
private/lex.nim
150
private/lex.nim
@ -4,7 +4,7 @@
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
|
||||
import lexbase, streams, strutils
|
||||
import lexbase, streams, strutils, unicode
|
||||
|
||||
type
|
||||
StringSource* = object
|
||||
@ -43,13 +43,14 @@ type
|
||||
|
||||
# templates
|
||||
|
||||
template advance(lex: YamlLexer[BaseLexer], step: int = 1) =
|
||||
proc advance(lex: YamlLexer[BaseLexer], step: int = 1) {.inline.} =
|
||||
lex.source.bufpos.inc(step)
|
||||
lex.c = lex.source.buf[lex.source.bufpos]
|
||||
|
||||
template advance(lex: YamlLexer[StringSource], step: int = 1) =
|
||||
proc advance(lex: YamlLexer[StringSource], step: int = 1) {.inline.} =
|
||||
lex.source.pos.inc(step)
|
||||
lex.c = lex.source.src[lex.source.pos]
|
||||
if lex.source.pos >= lex.source.src.len: lex.c = EndOfFile
|
||||
else: lex.c = lex.source.src[lex.source.pos]
|
||||
|
||||
# lexer states
|
||||
|
||||
@ -62,7 +63,7 @@ proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
|
||||
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
|
||||
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
|
||||
|
||||
# interface
|
||||
@ -92,6 +93,11 @@ const
|
||||
spaceOrLineEnd = {' ', '\t', '\l', '\c', EndOfFile}
|
||||
digits = {'0'..'9'}
|
||||
flowIndicators = {'[', ']', '{', '}', ','}
|
||||
|
||||
UTF8NextLine = toUTF8(0x85.Rune)
|
||||
UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
|
||||
UTF8LineSeparator = toUTF8(0x2028.Rune)
|
||||
UTF8ParagraphSeparator = toUTF8(0x2029.Rune)
|
||||
|
||||
template debug(message: string) {.dirty.} =
|
||||
when defined(yamlDebug):
|
||||
@ -100,20 +106,24 @@ template debug(message: string) {.dirty.} =
|
||||
|
||||
template lexCR(lex: YamlLexer[BaseLexer]) =
|
||||
lex.source.bufpos = lex.source.handleCR(lex.source.bufpos)
|
||||
lex.c = lex.source.buf[lex.source.bufpos]
|
||||
|
||||
template lexCR(lex: YamlLexer[StringSource]) =
|
||||
lex.source.pos.inc()
|
||||
if lex.source.src[lex.source.pos] == '\l': lex.source.pos.inc()
|
||||
lex.source.lineStart = lex.source.pos
|
||||
lex.source.line.inc()
|
||||
lex.c = lex.source.src[lex.source.pos]
|
||||
|
||||
template lexLF(lex: YamlLexer[BaseLexer]) =
|
||||
lex.source.bufpos = lex.source.handleLF(lex.source.bufpos)
|
||||
lex.c = lex.source.buf[lex.source.bufpos]
|
||||
|
||||
template lexLF(lex: YamlLexer[StringSource]) =
|
||||
lex.source.pos.inc()
|
||||
lex.source.lineStart = lex.source.pos
|
||||
lex.source.line.inc()
|
||||
lex.c = lex.source.src[lex.source.pos]
|
||||
|
||||
template lineNumber(lex: YamlLexer[BaseLexer]): int =
|
||||
lex.source.lineNumber
|
||||
@ -382,11 +392,127 @@ proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
|
||||
when inFlow: lex.stored = flowStyle[T]
|
||||
else: lex.stored = blockStyle[T]
|
||||
|
||||
proc addMultiple(s: var string, c: char, num: int) {.raises: [], inline.} =
|
||||
for i in 1..num:
|
||||
s.add(c)
|
||||
|
||||
proc processQuotedWhitespace(lex: YamlLexer, newlines: var int) =
|
||||
block outer:
|
||||
let beforeSpace = lex.buf.len
|
||||
while true:
|
||||
case lex.c
|
||||
of ' ', '\t': lex.buf.add(lex.c)
|
||||
of '\l':
|
||||
lex.lexLF()
|
||||
break
|
||||
of '\c':
|
||||
lex.lexCR()
|
||||
break
|
||||
else: break outer
|
||||
lex.advance()
|
||||
lex.buf.setLen(beforeSpace)
|
||||
while true:
|
||||
case lex.c
|
||||
of ' ', '\t': discard
|
||||
of '\l':
|
||||
lex.lexLF()
|
||||
newlines.inc()
|
||||
continue
|
||||
of '\c':
|
||||
lex.lexCR()
|
||||
newlines.inc()
|
||||
continue
|
||||
else:
|
||||
if newlines == 0: discard
|
||||
elif newlines == 1: lex.buf.add(' ')
|
||||
else: lex.buf.addMultiple('\l', newlines - 1)
|
||||
break
|
||||
lex.advance()
|
||||
|
||||
proc singleQuotedScalar[T](lex: YamlLexer[T]) =
|
||||
discard
|
||||
|
||||
debug("lex: singleQuotedScalar")
|
||||
lex.advance()
|
||||
while true:
|
||||
case lex.c
|
||||
of '\'':
|
||||
lex.advance()
|
||||
if lex.c == '\'': lex.buf.add('\'')
|
||||
else: break
|
||||
of EndOfFile: raise lex.generateError("Unfinished single quoted string")
|
||||
of '\l', '\c', '\t', ' ':
|
||||
var newlines = 1
|
||||
lex.processQuotedWhitespace(newlines)
|
||||
continue
|
||||
else: lex.buf.add(lex.c)
|
||||
lex.advance()
|
||||
|
||||
proc unicodeSequence(lex: YamlLexer, length: int) =
|
||||
debug("lex: unicodeSequence")
|
||||
var unicodeChar = 0.int
|
||||
for i in countup(0, length - 1):
|
||||
lex.advance()
|
||||
let digitPosition = length - i - 1
|
||||
case lex.c
|
||||
of EndOFFile, '\l', '\c':
|
||||
raise lex.generateError("Unfinished unicode escape sequence")
|
||||
of '0' .. '9':
|
||||
unicodeChar = unicodechar or (int(lex.c) - 0x30) shl (digitPosition * 4)
|
||||
of 'A' .. 'F':
|
||||
unicodeChar = unicodechar or (int(lex.c) - 0x37) shl (digitPosition * 4)
|
||||
of 'a' .. 'f':
|
||||
unicodeChar = unicodechar or (int(lex.c) - 0x57) shl (digitPosition * 4)
|
||||
else:
|
||||
raise lex.generateError(
|
||||
"Invalid character in unicode escape sequence: " &
|
||||
escape("" & lex.c))
|
||||
lex.buf.add(toUTF8(Rune(unicodeChar)))
|
||||
|
||||
proc doubleQuotedScalar[T](lex: YamlLexer[T]) =
|
||||
discard
|
||||
debug("lex: doubleQuotedScalar")
|
||||
lex.advance()
|
||||
while true:
|
||||
case lex.c
|
||||
of EndOfFile:
|
||||
raise lex.generateError("Unfinished double quoted string")
|
||||
of '\\':
|
||||
lex.advance()
|
||||
case lex.c
|
||||
of EndOfFile:
|
||||
raise lex.generateError("Unfinished escape sequence")
|
||||
of '0': lex.buf.add('\0')
|
||||
of 'a': lex.buf.add('\x07')
|
||||
of 'b': lex.buf.add('\x08')
|
||||
of '\t', 't': lex.buf.add('\t')
|
||||
of 'n': lex.buf.add('\l')
|
||||
of 'v': lex.buf.add('\v')
|
||||
of 'f': lex.buf.add('\f')
|
||||
of 'r': lex.buf.add('\c')
|
||||
of 'e': lex.buf.add('\e')
|
||||
of ' ': lex.buf.add(' ')
|
||||
of '"': lex.buf.add('"')
|
||||
of '/': lex.buf.add('/')
|
||||
of '\\': lex.buf.add('\\')
|
||||
of 'N': lex.buf.add(UTF8NextLine)
|
||||
of '_': lex.buf.add(UTF8NonBreakingSpace)
|
||||
of 'L': lex.buf.add(UTF8LineSeparator)
|
||||
of 'P': lex.buf.add(UTF8ParagraphSeparator)
|
||||
of 'x': lex.unicodeSequence(2)
|
||||
of 'u': lex.unicodeSequence(4)
|
||||
of 'U': lex.unicodeSequence(8)
|
||||
of '\l', '\c':
|
||||
var newlines = 0
|
||||
lex.processQuotedWhitespace(newlines)
|
||||
continue
|
||||
else: raise lex.generateError("Illegal character in escape sequence")
|
||||
of '"':
|
||||
lex.advance()
|
||||
break
|
||||
of '\l', '\c', '\t', ' ':
|
||||
var newlines = 1
|
||||
lex.processQuotedWhitespace(newlines)
|
||||
continue
|
||||
else: lex.buf.add(lex.c)
|
||||
lex.advance()
|
||||
|
||||
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||
case lex.c
|
||||
@ -452,12 +578,18 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
|
||||
break
|
||||
of space: discard
|
||||
else: break
|
||||
of lineEnd:
|
||||
lex.nextImpl = expectLineEnd[T]
|
||||
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
|
||||
break
|
||||
of flowIndicators:
|
||||
if lex.inFlow:
|
||||
lex.nextImpl = lex.stored
|
||||
break
|
||||
of ':':
|
||||
if not lex.nextIsPlainSafe(lex.inFlow): break outer
|
||||
if not lex.nextIsPlainSafe(lex.inFlow):
|
||||
lex.nextImpl = blockStyleInline[T]
|
||||
break outer
|
||||
else: discard
|
||||
t = ltScalarPart
|
||||
result = true
|
||||
|
@ -9,6 +9,8 @@ type
|
||||
case kind: LexerToken
|
||||
of tokensWithValue:
|
||||
value: string
|
||||
of ltIndentation:
|
||||
indentation: int
|
||||
else: discard
|
||||
|
||||
proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
|
||||
@ -18,13 +20,63 @@ proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
|
||||
let t = lex.next()
|
||||
doAssert t == expectedToken.kind, "Wrong token kind: Expected " &
|
||||
$expectedToken.kind & ", got " & $t
|
||||
if expectedToken.kind in tokensWithValue:
|
||||
case expectedToken.kind
|
||||
of tokensWithValue:
|
||||
doAssert lex.buf == expectedToken.value,
|
||||
"Wrong token content: Expected " & escape(expectedToken.value) &
|
||||
", got " & escape(lex.buf)
|
||||
lex.buf = ""
|
||||
of ltIndentation:
|
||||
doAssert lex.indentation == expectedToken.indentation,
|
||||
"Wrong indentation length: Expected " & $expectedToken.indentation &
|
||||
", got " & $lex.indentation
|
||||
else: discard
|
||||
|
||||
proc i(indent: int): TokenWithValue =
|
||||
TokenWithValue(kind: ltIndentation, indentation: indent)
|
||||
proc sp(v: string): TokenWithValue =
|
||||
TokenWithValue(kind: ltScalarPart, value: v)
|
||||
proc qs(v: string): TokenWithValue =
|
||||
TokenWithValue(kind: ltQuotedScalar, value: v)
|
||||
proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd)
|
||||
proc mk(): TokenWithValue = TokenWithValue(kind: ltMapKeyInd)
|
||||
proc mv(): TokenWithValue = TokenWithValue(kind: ltMapValInd)
|
||||
|
||||
suite "Lexer":
|
||||
test "Empty document":
|
||||
assertEquals("", se())
|
||||
assertEquals("", se())
|
||||
|
||||
test "Single-line scalar":
|
||||
assertEquals("scalar", i(0), sp("scalar"), se())
|
||||
|
||||
test "Multiline scalar":
|
||||
assertEquals("scalar\l line two", i(0), sp("scalar"), i(2),
|
||||
sp("line two"), se())
|
||||
|
||||
test "Single-line mapping":
|
||||
assertEquals("key: value", i(0), sp("key"), mv(), sp("value"), se())
|
||||
|
||||
test "Multiline mapping":
|
||||
assertEquals("key:\n value", i(0), sp("key"), mv(), i(2), sp("value"),
|
||||
se())
|
||||
|
||||
test "Explicit mapping":
|
||||
assertEquals("? key\n: value", i(0), mk(), sp("key"), i(0), mv(),
|
||||
sp("value"), se())
|
||||
|
||||
test "Single-line single-quoted scalar":
|
||||
assertEquals("'quoted scalar'", i(0), qs("quoted scalar"), se())
|
||||
|
||||
test "Multiline single-quoted scalar":
|
||||
assertEquals("'quoted\l multi line \l\lscalar'", i(0),
|
||||
qs("quoted multi line\lscalar"), se())
|
||||
|
||||
test "Single-line double-quoted scalar":
|
||||
assertEquals("\"quoted scalar\"", i(0), qs("quoted scalar"), se())
|
||||
|
||||
test "Multiline double-quoted scalar":
|
||||
assertEquals("\"quoted\l multi line \l\lscalar\"", i(0),
|
||||
qs("quoted multi line\lscalar"), se())
|
||||
|
||||
test "Escape sequences":
|
||||
assertEquals(""""\n\x31\u0032\U00000033"""", i(0), qs("\l123"), se())
|
Loading…
x
Reference in New Issue
Block a user