First compiling version (incomplete)

This commit is contained in:
Felix Krause 2016-09-10 12:38:03 +02:00
parent d51872f9e7
commit 68a157e173
2 changed files with 109 additions and 56 deletions

View File

@ -17,14 +17,17 @@ type
lexCR(c) lexCR(c)
lexLF(c) lexLF(c)
YamlLexer*[T: SourceProvider] = ref object # YamlLexer*[T: SourceProvider] = ref object # not possible -> compiler bug
YamlLexer*[T] = ref object
source: T source: T
inFlow: bool inFlow: bool
literalEndIndent: int literalEndIndent: int
nextImpl, stored: proc(lex: YamlLexer[T], t: var LexerToken): bool nextImpl, stored: LexerState[T]
c: char
buf*: string not nil buf*: string not nil
indentation*: int indentation*: int
c: char
LexerState[T] = proc(lex: YamlLexer[T], t: var LexerToken): bool
LexerToken* = enum LexerToken* = enum
ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand, ltYamlDirective, ltYamlVersion, ltTagDirective, ltTagShorthand,
@ -35,6 +38,8 @@ type
ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias ltLiteralTag, ltTagSuffix, ltAnchor, ltAlias
YamlLexerError* = object of Exception YamlLexerError* = object of Exception
line, column: int
lineContent: string
# templates # templates
@ -54,28 +59,30 @@ proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool
proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool proc blockStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool
proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool
proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool proc flowStyle[T](lex: YamlLexer[T], t: var LexerToken): bool {.locks:0.}
proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool proc streamEnd[T](lex: YamlLexer[T], t: var LexerToken): bool
# interface # interface
proc newYamlLexer*(source: Stream): YamlLexer[BaseLexer] = proc newYamlLexer*(source: Stream): YamlLexer[BaseLexer] =
result = YamlLexer[T](source: BaseLexer(), inFlow: false, buf: "", result = YamlLexer[BaseLexer](source: BaseLexer(), inFlow: false, buf: "")
nextImpl: outsideDoc[T])
result.source.open(source) result.source.open(source)
result.c = result.source.buf[result.source.bufpos] result.c = result.source.buf[result.source.bufpos]
proc newYamlLexer*[T: StringSource](source: string, startAt: int = 0): proc newYamlLexer*(source: string, startAt: int = 0):
YamlLexer[T] = YamlLexer[StringSource] =
result = YamlLexer[T](nextImpl: outsideDoc, buf: "", source: result = YamlLexer[StringSource](buf: "", source:
StringSource(src: source, pos: startAt, lineStart: startAt, line: 1), StringSource(src: source, pos: startAt, lineStart: startAt, line: 1),
inFlow: false, c: source[startAt]) inFlow: false, c: source[startAt])
proc init*[T](lex: YamlLexer[T]) =
lex.nextImpl = outsideDoc[T]
proc next*(lex: YamlLexer): LexerToken = proc next*(lex: YamlLexer): LexerToken =
while not lex.nextImpl(result): discard while not lex.nextImpl(lex, result): discard
# implementation # implementation
@ -91,30 +98,53 @@ template debug(message: string) {.dirty.} =
try: styledWriteLine(stdout, fgBlue, message) try: styledWriteLine(stdout, fgBlue, message)
except IOError: discard except IOError: discard
template lexCR(lex: YamlLexer[BaseLexer]) =
lex.source.bufpos = lex.source.handleCR(lex.source.bufpos)
template lexCR(lex: YamlLexer[StringSource]) =
lex.source.pos.inc()
if lex.source.src[lex.source.pos] == '\l': lex.source.pos.inc()
lex.source.lineStart = lex.source.pos
lex.source.line.inc()
template lexLF(lex: YamlLexer[BaseLexer]) =
lex.source.bufpos = lex.source.handleLF(lex.source.bufpos)
template lexLF(lex: YamlLexer[StringSource]) =
lex.source.pos.inc()
lex.source.lineStart = lex.source.pos
lex.source.line.inc()
template lineNumber(lex: YamlLexer[BaseLexer]): int =
lex.source.lineNumber
template lineNumber(lex: YamlLexer[StringSource]): int =
lex.source.line
template columnNumber(lex: YamlLexer[BaseLexer]): int =
lex.source.getColNumber() + 1
template columnNumber(lex: YamlLexer[StringSource]): int =
lex.source.pos - lex.source.lineStart + 1
template currentLine(lex: YamlLexer[BaseLexer]): string =
lex.source.getCurrentLine(true)
template currentLine(lex: YamlLexer[StringSource]): string =
var result = ""
var i = lex.source.lineStart
while lex.source.src[i] notin lineEnd:
result.add(lex.source.src[i])
inc(i)
result.add("\n" & spaces(lex.columnNumber) & "^\n")
result
proc generateError(lex: YamlLexer, message: string): proc generateError(lex: YamlLexer, message: string):
ref YamlLexerError {.raises: [].} = ref YamlLexerError {.raises: [].} =
result = newException(YamlLexerError, message) result = newException(YamlLexerError, message)
result.line = lex.lineNumber result.line = lex.lineNumber
result.column = lex.bufpos + 1 result.column = lex.columnNumber
result.lineContent = lex.getCurrentLine(false) & result.lineContent = lex.currentLine
repeat(' ', lex.getColNumber(lex.bufpos)) & "^\n"
template handleCR(lex: YamlLexer[BaseLexer]) =
lex.source.bufpos = lex.source.handleCR(lex.source.bufpos)
template handleCR(lex: YamlLexer[StringSource]) =
lex.source.pos.inc()
if lex.source.src[lex.source.pos] == '\l': lex.source.pos.inc()
lex.source.lineStart = lex.source.pos
lex.source.row.line.inc()
template handleLF(lex: YamlLexer[BaseLexer]) =
lex.source.bufpos = lex.source.handleLF(lex.source.bufpos)
template handleLF(lex: YamlLexer[StringSource]) =
lex.source.pos.inc()
lex.source.lineStart = lex.source.pos
lex.source.row.line.inc()
proc directiveName(lex: YamlLexer) = proc directiveName(lex: YamlLexer) =
while lex.c notin spaceOrLineEnd: while lex.c notin spaceOrLineEnd:
@ -123,7 +153,7 @@ proc directiveName(lex: YamlLexer) =
proc yamlVersion[T](lex: YamlLexer[T], t: var LexerToken): bool = proc yamlVersion[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: yamlVersion") debug("lex: yamlVersion")
while lex.c in space: lex.anvance() while lex.c in space: lex.advance()
if lex.c notin digits: raise lex.generateError("Invalid YAML version number") if lex.c notin digits: raise lex.generateError("Invalid YAML version number")
lex.buf.add(lex.c) lex.buf.add(lex.c)
lex.advance() lex.advance()
@ -153,10 +183,12 @@ template nextIsPlainSafe(lex: YamlLexer[BaseLexer], inFlow: bool): bool =
else: result = true else: result = true
template nextIsPlainSafe(lex: YamlLexer[StringSource], inFlow: bool): bool = template nextIsPlainSafe(lex: YamlLexer[StringSource], inFlow: bool): bool =
var result: bool
case lex.source.src[lex.source.pos + 1] case lex.source.src[lex.source.pos + 1]
of spaceOrLineEnd: result = false of spaceOrLineEnd: result = false
of flowIndicators: result = not inFlow of flowIndicators: result = not inFlow
else: result = true else: result = true
result
proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool = proc tagShorthand[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: tagShorthand") debug("lex: tagShorthand")
@ -184,7 +216,7 @@ proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c == '!': if lex.c == '!':
lex.buf.add(lex.c) lex.buf.add(lex.c)
lex.avance() lex.advance()
while true: while true:
case lex.c case lex.c
of spaceOrLineEnd: break of spaceOrLineEnd: break
@ -196,10 +228,7 @@ proc tagUri[T](lex: YamlLexer[T], t: var LexerToken): bool =
escape("" & lex.c)) escape("" & lex.c))
proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool = proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool =
var c = lex.curChar() while lex.c notin lineEnd + {'#'}: lex.advance()
while c notin lineEnd + {'#'}:
lex.advance()
c = lex.curChar()
t = ltUnknownDirectiveParams t = ltUnknownDirectiveParams
result = true result = true
lex.stored = outsideDoc[T] lex.stored = outsideDoc[T]
@ -208,7 +237,7 @@ proc unknownDirParams[T](lex: YamlLexer[T], t: var LexerToken): bool =
proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool = proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
debug("lex: expectLineEnd") debug("lex: expectLineEnd")
result = false result = false
while lex.c in space: lex.anvance() while lex.c in space: lex.advance()
while true: while true:
case lex.c case lex.c
of '#': of '#':
@ -218,11 +247,11 @@ proc expectLineEnd[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.nextImpl = streamEnd[T] lex.nextImpl = streamEnd[T]
break break
of '\l': of '\l':
lex.handleLF() lex.lexLF()
lex.nextImpl = lex.stored lex.nextImpl = lex.stored
break break
of '\c': of '\c':
lex.handleCR() lex.lexCR()
lex.nextImpl = lex.stored lex.nextImpl = lex.stored
break break
else: else:
@ -275,10 +304,16 @@ proc outsideDoc[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.advance() lex.advance()
lex.directiveName() lex.directiveName()
case lex.buf case lex.buf
of "YAML": lex.nextImpl = yamlVersion[T] of "YAML":
of "TAG": lex.nextImpl = tagShorthand[T] t = ltYamlDirective
else: lex.nextImpl = unknownDirParams[T] lex.nextImpl = yamlVersion[T]
break of "TAG":
t = ltTagDirective
lex.nextImpl = tagShorthand[T]
else:
t = ltUnknownDirective
lex.nextImpl = unknownDirParams[T]
return true
of '-': of '-':
lex.possibleDirectivesEnd(t) lex.possibleDirectivesEnd(t)
return true return true
@ -347,6 +382,12 @@ proc flowIndicator[T](lex: YamlLexer[T], indicator: LexerToken,
when inFlow: lex.stored = flowStyle[T] when inFlow: lex.stored = flowStyle[T]
else: lex.stored = blockStyle[T] else: lex.stored = blockStyle[T]
proc singleQuotedScalar[T](lex: YamlLexer[T]) =
discard
proc doubleQuotedScalar[T](lex: YamlLexer[T]) =
discard
proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool = proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
case lex.c case lex.c
of ':': result = lex.possibleIndicatorChar(ltMapValInd, t) of ':': result = lex.possibleIndicatorChar(ltMapValInd, t)
@ -367,10 +408,10 @@ proc blockStyleInline[T](lex: YamlLexer[T], t: var LexerToken): bool =
of '>', '|': of '>', '|':
# TODO # TODO
result = true result = true
of '{': result = lex.flowIndicator(ltBraceOpen, t) of '{': result = lex.flowIndicator(ltBraceOpen, t, false)
of '}': result = lex.flowIndicator(ltBraceClose, t) of '}': result = lex.flowIndicator(ltBraceClose, t, false)
of '[': result = lex.flowIndicator(ltBracketOpen, t) of '[': result = lex.flowIndicator(ltBracketOpen, t, false)
of ']': result = lex.flowIndicator(ltBracketClose, t) of ']': result = lex.flowIndicator(ltBracketClose, t, false)
else: else:
lex.nextImpl = plainScalarPart[T] lex.nextImpl = plainScalarPart[T]
lex.stored = blockStyleInline[T] lex.stored = blockStyleInline[T]
@ -388,14 +429,14 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
while true: while true:
lex.buf.add(lex.c) lex.buf.add(lex.c)
lex.advance() lex.advance()
case lex.ch case lex.c
of lineEnd + {'#'}: of lineEnd + {'#'}:
lex.buf.setLen(lenBeforeSpace) lex.buf.setLen(lenBeforeSpace)
lex.nextImpl = expectLineEnd[T] lex.nextImpl = expectLineEnd[T]
lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T] lex.stored = if lex.inFlow: flowStyle[T] else: blockStyle[T]
break outer break outer
of ':': of ':':
if lex.nextIsPlainSafe(): break if lex.nextIsPlainSafe(lex.inFlow): break
else: else:
lex.buf.setLen(lenBeforeSpace) lex.buf.setLen(lenBeforeSpace)
lex.nextImpl = lex.stored lex.nextImpl = lex.stored
@ -416,7 +457,7 @@ proc plainScalarPart[T](lex: YamlLexer[T], t: var LexerToken): bool =
lex.nextImpl = lex.stored lex.nextImpl = lex.stored
break break
of ':': of ':':
if not lex.nextIsPlainSafe(): break outer if not lex.nextIsPlainSafe(lex.inFlow): break outer
else: discard else: discard
t = ltScalarPart t = ltScalarPart
result = true result = true

View File

@ -1,18 +1,30 @@
import ../private/lex import ../private/lex
import unittest import unittest, strutils
const tokensWithValue = [ltScalarPart, ltQuotedScalar]
type type
TokenWithValue = object TokenWithValue = object
case kind: LexerToken case kind: LexerToken
of ltScalarPart, ltQuotedScalar: of tokensWithValue:
value: string value: string
else: discard else: discard
proc assertEquals(input: string, expected: varargs[TokenWithValue]) = proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
let lex = newYamlLexer[StringSource](input) let lex = newYamlLexer(input)
lex.init()
for expectedToken in expected:
let t = lex.next()
doAssert t == expectedToken.kind, "Wrong token kind: Expected " &
$expectedToken.kind & ", got " & $t
if expectedToken.kind in tokensWithValue:
doAssert lex.buf == expectedToken.value,
"Wrong token content: Expected " & escape(expectedToken.value) &
", got " & escape(lex.buf)
proc se(): TokenWithValue = TokenWithValue(kind: ltStreamEnd)
suite "Lexer": suite "Lexer":
test "Empty document": test "Empty document":
assertEquals("", se())