Added type hinting in lexer

This commit is contained in:
Felix Krause 2015-12-23 10:28:58 +01:00
parent a9a1a67bc6
commit 5c0e9f570b
2 changed files with 248 additions and 19 deletions

View File

@ -44,8 +44,8 @@ type
ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion, ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion,
ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment, ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment,
# scalar reading states # scalar reading states
ylPlainScalar, ylSingleQuotedScalar, ylDoublyQuotedScalar, ylPlainScalar, ylPlainScalarNone, ylSingleQuotedScalar,
ylEscape, ylBlockScalar, ylBlockScalarHeader, ylDoublyQuotedScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader,
ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar, ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar,
# indentation # indentation
ylIndentation, ylIndentation,
@ -58,6 +58,14 @@ type
# anchoring # anchoring
ylAnchor, ylAlias ylAnchor, ylAlias
YamlLexerTypeHintState = enum
ythInitial, ythN, ythNU, ythNUL, ythNULL, ythF, ythFA, ythFAL, ythFALS,
ythFALSE, ythT, ythTR, ythTRU, ythTRUE, ythMinus, yth0, ythInt,
ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent, ythNone
YamlLexerTypeHint* = enum
yTypeInteger, yTypeFloat, yTypeBoolean, yTypeNull, yTypeString
YamlLexer* = object of BaseLexer YamlLexer* = object of BaseLexer
indentations: seq[int] indentations: seq[int]
encoding: Encoding encoding: Encoding
@ -65,6 +73,7 @@ type
charoffset: int charoffset: int
content*: string # my.content of the last returned token. content*: string # my.content of the last returned token.
line*, column*: int line*, column*: int
typeHint*: YamlLexerTypeHint
const const
UTF8NextLine = toUTF8(Rune(0x85)) UTF8NextLine = toUTF8(Rune(0x85))
@ -145,10 +154,28 @@ template yieldToken(kind: YamlLexerToken) {.dirty.} =
echo "Lexer token: yamlScalar(\"", my.content, "\")" echo "Lexer token: yamlScalar(\"", my.content, "\")"
else: else:
echo "Lexer token: ", kind echo "Lexer token: ", kind
yield kind yield kind
my.content = "" my.content = ""
template yieldScalarPart() {.dirty.} =
case typeHintState
of ythNULL:
my.typeHint = yTypeNull
of ythTRUE, ythFALSE:
my.typeHint = yTypeBoolean
of ythInt, yth0:
my.typeHint = yTypeInteger
of ythDecimal, ythExponent:
my.typeHint = yTypeFloat
else:
my.typeHint = yTypeString
when defined(yamlDebug):
echo "Lexer token: yamlScalarPart(\"", my.content, "\".", my.typeHint,
")"
yield yamlScalarPart
my.content = ""
template yieldError(message: string) {.dirty.} = template yieldError(message: string) {.dirty.} =
when defined(yamlDebug): when defined(yamlDebug):
echo "Lexer error: " & message echo "Lexer error: " & message
@ -171,6 +198,138 @@ template handleLF() {.dirty.} =
template `or`(r: Rune, i: int): Rune = template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i) cast[Rune](cast[int](r) or i)
template advanceTypeHint(ch: char) {.dirty.} =
case ch
of '.':
case typeHintState
of yth0, ythInt:
typeHintState = ythDecimal
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '+':
case typeHintState
of ythNumE:
typeHintState = ythNumEPlusMinus
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '-':
case typeHintState
of ythInitial:
typeHintState = ythMinus
of ythNumE:
typeHintState = ythNumEPlusMinus
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '0':
case typeHintState
of ythInitial, ythMinus:
typeHintState = yth0
of ythNumE, ythNumEPlusMinus:
typeHintState = ythExponent
of ythInt, ythDecimal, ythExponent:
discard
else:
typeHintState = ythNone
state = ylPlainScalarNone
of '1'..'9':
case typeHintState
of ythInitial, ythMinus:
typeHintState = ythInt
of ythNumE, ythNumEPlusMinus:
typeHintState = ythExponent
of ythInt, ythDecimal, ythExponent:
discard
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'a':
case typeHintState
of ythF:
typeHintState = ythFA
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'e':
case typeHintState
of yth0, ythInt, ythDecimal:
typeHintState = ythNumE
of ythTRU:
typeHintState = ythTRUE
of ythFALS:
typeHintState = ythFALSE
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'E':
case typeHintState
of yth0, ythInt, ythDecimal:
typeHintState = ythNumE
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'f':
case typeHintState
of ythInitial:
typeHintState = ythF
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'l':
case typeHintState
of ythNU:
typeHintState = ythNUL
of ythNUL:
typeHintState = ythNULL
of ythFA:
typeHintState = ythFAL
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'n':
case typeHintState
of ythInitial:
typeHintState = ythN
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'r':
case typeHintState
of ythT:
typeHintState = ythTR
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 's':
case typeHintState
of ythFAL:
typeHintState = ythFALS
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 't':
case typeHintState
of ythInitial:
typeHintState = ythT
else:
typeHintState = ythNone
state = ylPlainScalarNone
of 'u':
case typeHintState
of ythN:
typeHintState = ythNU
of ythTR:
typeHintState = ythTRU
else:
typeHintState = ythNone
state = ylPlainScalarNone
else:
typeHintState = ythNone
state = ylPlainScalarNone
iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} = iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
var var
# the following three values are used for parsing escaped unicode chars # the following three values are used for parsing escaped unicode chars
@ -188,6 +347,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
# Lexer must know whether it parses block or flow style. Therefore, # Lexer must know whether it parses block or flow style. Therefore,
# it counts the number of open flow arrays / maps here # it counts the number of open flow arrays / maps here
state = ylInitial # lexer state state = ylInitial # lexer state
typeHintState = ythInitial # for giving type hints of plain scalars
lastIndentationLength = 0 lastIndentationLength = 0
# after parsing the indentation of the line, this will hold the # after parsing the indentation of the line, this will hold the
# indentation length of the current line. Needed for checking where # indentation length of the current line. Needed for checking where
@ -260,10 +420,20 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(yamlLineStart) yieldToken(yamlLineStart)
my.content = tmp my.content = tmp
my.column = curPos my.column = curPos
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
continue continue
else: else:
state = ylPlainScalar let tmp = my.content
my.content = ""
yieldToken(yamlLineStart)
my.content = tmp
if my.content.len == 1:
typeHintState = ythMinus
state = ylPlainScalar
else:
typeHintState = ythNone
state = ylPlainScalarNone
continue continue
of ylDots: of ylDots:
case c case c
@ -275,10 +445,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(yamlDocumentEnd) yieldToken(yamlDocumentEnd)
state = ylDirectiveLineEnd state = ylDirectiveLineEnd
else: else:
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
continue continue
else: else:
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
continue continue
of ylDirectiveLineEnd: of ylDirectiveLineEnd:
case c case c
@ -432,7 +604,35 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylPlainScalar: of ylPlainScalar:
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
yieldToken(yamlScalarPart) yieldScalarPart()
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
state = ylPlainScalarNone
typeHintState = ythNone
of '[', ']', '{', '}':
yieldScalarPart()
state = ylInitialInLine
continue
else:
advanceTypeHint(c)
my.content.add(c)
of ylPlainScalarNone:
case c
of EndOfFile, '\r', '\x0A':
yieldScalarPart()
state = ylLineEnd state = ylLineEnd
continue continue
of ':': of ':':
@ -448,7 +648,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
else: else:
my.content.add(c) my.content.add(c)
of '[', ']', '{', '}': of '[', ']', '{', '}':
yieldToken(yamlScalarPart) yieldScalarPart()
state = ylInitialInLine state = ylInitialInLine
continue continue
else: else:
@ -458,20 +658,21 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
case c case c
of ' ', '\t', EndOfFile, '\r', '\x0A': of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlScalarPart) yieldScalarPart()
state = ylInitialInLine state = ylInitialInLine
else: else:
my.content.add(trailingSpace) my.content.add(trailingSpace)
my.content.add(lastSpecialChar) my.content.add(lastSpecialChar)
lastSpecialChar = '\0' lastSpecialChar = '\0'
trailingSpace = "" trailingSpace = ""
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
continue continue
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
trailingSpace = "" trailingSpace = ""
yieldToken(yamlScalarPart) yieldScalarPart()
state = ylLineEnd state = ylLineEnd
continue continue
of ' ', '\t': of ' ', '\t':
@ -483,7 +684,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(trailingSpace) my.content.add(trailingSpace)
my.content.add(c) my.content.add(c)
trailingSpace = "" trailingSpace = ""
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
of ':', '#': of ':', '#':
lastSpecialChar = c lastSpecialChar = c
of '[', ']', '{', '}': of '[', ']', '{', '}':
@ -495,7 +697,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(trailingSpace) my.content.add(trailingSpace)
my.content.add(c) my.content.add(c)
trailingSpace = "" trailingSpace = ""
state = ylPlainScalar state = ylPlainScalarNone
typeHintState = ythNone
of ylInitialInLine: of ylInitialInLine:
if lastSpecialChar != '\0': if lastSpecialChar != '\0':
@ -538,6 +741,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
lastSpecialChar = '\0' lastSpecialChar = '\0'
my.column = curPos - 1 my.column = curPos - 1
state = ylPlainScalar state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(lastSpecialChar)
continue continue
case c case c
of '\r', '\x0A', EndOfFile: of '\r', '\x0A', EndOfFile:
@ -550,6 +755,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c my.content = "" & c
my.column = curPos my.column = curPos
state = ylPlainScalar state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of '[': of '[':
inc(flowDepth) inc(flowDepth)
yieldToken(yamlOpeningBracket) yieldToken(yamlOpeningBracket)
@ -590,6 +797,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c my.content = "" & c
my.column = curPos my.column = curPos
state = ylPlainScalar state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of '?', ':': of '?', ':':
my.column = curPos my.column = curPos
lastSpecialChar = c lastSpecialChar = c
@ -605,6 +814,8 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c my.content = "" & c
my.column = curPos my.column = curPos
state = ylPlainScalar state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of ylComment, ylDirectiveComment: of ylComment, ylDirectiveComment:
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
@ -823,7 +1034,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylBlockScalar: of ylBlockScalar:
case c case c
of EndOfFile, '\r', '\x0A': of EndOfFile, '\r', '\x0A':
yieldToken(yamlScalarPart) yieldScalarPart()
state = ylLineEnd state = ylLineEnd
continue continue
else: else:

View File

@ -3,7 +3,8 @@ import streams, unicode
import unittest import unittest
type BasicLexerToken = tuple[kind: YamlLexerToken, content: string] type BasicLexerToken = tuple[kind: YamlLexerToken, content: string,
typeHint: YamlLexerTypeHint]
template ensure(input: string, expected: openarray[BasicLexerToken]) = template ensure(input: string, expected: openarray[BasicLexerToken]) =
var var
@ -30,13 +31,20 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) =
expected[i].content, "\", got \"", lex.content, "\")" expected[i].content, "\", got \"", lex.content, "\")"
fail() fail()
break break
if token == yamlScalarPart:
if lex.typeHint != expected[i].typeHint:
echo "wrong type hint (expected ", expected[i].typeHint,
", got ", lex.typeHint, ")"
fail()
break
inc(i) inc(i)
if i < expected.len: if i < expected.len:
echo "received less tokens than expected (first missing = ", echo "received less tokens than expected (first missing = ",
expected[i].kind, ")" expected[i].kind, ")"
proc t(kind: YamlLexerToken, content: string): BasicLexerToken = proc t(kind: YamlLexerToken, content: string,
(kind: kind, content: content) typeHint: YamlLexerTypeHint = yTypeString): BasicLexerToken =
(kind: kind, content: content, typeHint: typeHint)
suite "Lexing": suite "Lexing":
test "Lexing: YAML Directive": test "Lexing: YAML Directive":
@ -195,4 +203,14 @@ foo:
ensure("!<tag:http://example.com/str> tagged", ensure("!<tag:http://example.com/str> tagged",
[t(yamlLineStart, ""), [t(yamlLineStart, ""),
t(yamlVerbatimTag, "tag:http://example.com/str"), t(yamlVerbatimTag, "tag:http://example.com/str"),
t(yamlScalarPart, "tagged"), t(yamlStreamEnd, nil)]) t(yamlScalarPart, "tagged"), t(yamlStreamEnd, nil)])
test "Lexing: Type hints":
ensure("false\nnull\nstring\n-13\n42.25\n-4e+3\n5.42e78",
[t(yamlLineStart, ""), t(yamlScalarPart, "false", yTypeBoolean),
t(yamlLineStart, ""), t(yamlScalarPart, "null", yTypeNull),
t(yamlLineStart, ""), t(yamlScalarPart, "string", yTypeString),
t(yamlLineStart, ""), t(yamlScalarPart, "-13", yTypeInteger),
t(yamlLineStart, ""), t(yamlScalarPart, "42.25", yTypeFloat),
t(yamlLineStart, ""), t(yamlScalarPart, "-4e+3", yTypeFloat),
t(yamlLineStart, ""), t(yamlScalarPart, "5.42e78", yTypeFloat),
t(yamlStreamEnd, nil)])