Refactoring: Extracted type guessing from lexer

This commit is contained in:
Felix Krause 2016-01-14 19:58:38 +01:00
parent 6130c24370
commit 551c8b0dd1
10 changed files with 267 additions and 369 deletions

View File

@ -19,8 +19,7 @@ proc `==`*(left: YamlStreamEvent, right: YamlStreamEvent): bool =
of yamlScalar:
result = left.scalarAnchor == right.scalarAnchor and
left.scalarTag == right.scalarTag and
left.scalarContent == right.scalarContent and
left.scalarType == right.scalarType
left.scalarContent == right.scalarContent
of yamlAlias:
result = left.aliasTarget == right.aliasTarget
@ -41,7 +40,6 @@ proc `$`*(event: YamlStreamEvent): string =
result &= "tag=" & $event.scalarTag
if event.scalarAnchor != yAnchorNone:
result &= ", anchor=" & $event.scalarAnchor
result &= ", typeHint=" & $event.scalarType
result &= ", content=\"" & event.scalarContent & '\"'
of yamlAlias:
result &= "aliasTarget=" & $event.aliasTarget
@ -69,8 +67,6 @@ proc endSeqEvent*(): YamlStreamEvent =
result = YamlStreamEvent(kind: yamlEndSequence)
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone,
typeHint: YamlTypeHint = yTypeUnknown): YamlStreamEvent =
anchor: AnchorId = yAnchorNone): YamlStreamEvent =
result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag,
scalarAnchor: anchor, scalarContent: content,
scalarType: typeHint)
scalarAnchor: anchor, scalarContent: content)

172
private/hints.nim Normal file
View File

@ -0,0 +1,172 @@
type
YamlTypeHintState = enum
ythInitial,
ythF, ythFA, ythFAL, ythFALS, ythFALSE,
ythN, ythNU, ythNUL, ythNULL,
ythNO,
ythO, ythON,
ythOF, ythOFF,
ythT, ythTR, ythTRU, ythTRUE,
ythY, ythYE, ythYES,
ythPoint, ythPointI, ythPointIN, ythPointINF,
ythPointN, ythPointNA, ythPointNAN,
ythLowerFA, ythLowerFAL, ythLowerFALS,
ythLowerNU, ythLowerNUL,
ythLowerOF,
ythLowerTR, ythLowerTRU,
ythLowerYE,
ythPointLowerIN, ythPointLowerN, ythPointLowerNA,
ythMinus, yth0, ythInt, ythDecimal, ythNumE, ythNumEPlusMinus,
ythExponent, ythNone
macro typeHintStateMachine(c: untyped, content: untyped): stmt =
assert content.kind == nnkStmtList
result = newNimNode(nnkCaseStmt, content).add(copyNimNode(c))
for branch in content.children:
assert branch.kind == nnkOfBranch
var
charBranch = newNimNode(nnkOfBranch, branch)
i = 0
stateBranches = newNimNode(nnkCaseStmt, branch).add(
newIdentNode("typeHintState"))
while branch[i].kind != nnkStmtList:
charBranch.add(copyNimTree(branch[i]))
inc(i)
for rule in branch[i].children:
assert rule.kind == nnkInfix
assert ($rule[0].ident == "=>")
var stateBranch = newNimNode(nnkOfBranch, rule)
case rule[1].kind
of nnkBracket:
for item in rule[1].children:
stateBranch.add(item)
of nnkIdent:
stateBranch.add(rule[1])
else:
assert false
if rule[2].kind == nnkNilLit:
stateBranch.add(newStmtList(newNimNode(nnkDiscardStmt).add(
newEmptyNode())))
else:
stateBranch.add(newStmtList(newAssignment(
newIdentNode("typeHintState"), copyNimTree(rule[2]))))
stateBranches.add(stateBranch)
stateBranches.add(newNimNode(nnkElse).add(newStmtList(
newNimNode(nnkReturnStmt).add(newIdentNode("yTypeString")))))
charBranch.add(newStmtList(stateBranches))
result.add(charBranch)
result.add(newNimNode(nnkElse).add(newStmtList(
newNimNode(nnkReturnStmt).add(newIdentNode("yTypeString")))))
template advanceTypeHint(ch: char) {.dirty.} =
typeHintStateMachine ch:
of '~':
ythInitial => ythNULL
of '.':
[yth0, ythInt] => ythDecimal
[ythInitial, ythMinus] => ythPoint
of '+': ythNumE => ythNumEPlusMinus
of '-':
ythInitial => ythMinus
ythNumE => ythNumEPlusMinus
of '0':
[ythInitial, ythMinus] => yth0
[ythNumE, ythNumEPlusMinus] => ythExponent
of '1'..'9':
[ythInitial, ythMinus] => ythInt
[ythNumE, ythNumEPlusMinus] => ythExponent
[ythInt, ythDecimal, ythExponent] => nil
of 'a':
ythF => ythLowerFA
ythPointN => ythPointNA
ythPointLowerN => ythPointLowerNA
of 'A':
ythF => ythFA
ythPointN => ythPointNA
of 'e':
[yth0, ythInt, ythDecimal] => ythNumE
ythLowerFALS => ythFALSE
ythLowerTRU => ythTRUE
ythY => ythLowerYE
of 'E':
[yth0, ythInt, ythDecimal] => ythNumE
ythFALS => ythFALSE
ythTRU => ythTRUE
ythY => ythYE
of 'f':
ythInitial => ythF
ythO => ythLowerOF
ythLowerOF => ythOFF
ythPointLowerIN => ythPointINF
of 'F':
ythInitial => ythF
ythO => ythOF
ythOF => ythOFF
ythPointIN => ythPointINF
of 'i', 'I': ythPoint => ythPointI
of 'l':
ythLowerNU => ythLowerNUL
ythLowerNUL => ythNULL
ythLowerFA => ythLowerFAL
of 'L':
ythNU => ythNUL
ythNUL => ythNULL
ythFA => ythFAL
of 'n':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointLowerN
ythPointI => ythPointLowerIN
ythPointLowerNA => ythPointNAN
of 'N':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointN
ythPointI => ythPointIN
ythPointNA => ythPointNAN
of 'o', 'O':
ythInitial => ythO
ythN => ythNO
of 'r': ythT => ythLowerTR
of 'R': ythT => ythTR
of 's':
ythLowerFAL => ythLowerFALS
ythLowerYE => ythYES
of 'S':
ythFAL => ythFALS
ythYE => ythYES
of 't', 'T': ythInitial => ythT
of 'u':
ythN => ythLowerNU
ythLowerTR => ythLowerTRU
of 'U':
ythN => ythNU
ythTR => ythTRU
of 'y', 'Y': ythInitial => ythY
proc guessType*(scalar: string): TypeHint =
var
typeHintState: YamlTypeHintState = ythInitial
for c in scalar:
advanceTypeHint(c)
case typeHintState
of ythNULL:
result = yTypeNull
of ythTRUE, ythON, ythYES, ythY:
result = yTypeBoolTrue
of ythFALSE, ythOFF, ythNO, ythN:
result = yTypeBoolFalse
of ythInt, yth0:
result = yTypeInteger
of ythDecimal, ythExponent:
result = yTypeFloat
of ythPointINF:
result = yTypeFloatInf
of ythPointNAN:
result = yTypeFloatNaN
else:
result = yTypeUnknown

View File

@ -9,18 +9,17 @@ type
proc initLevel(node: JsonNode): Level = (node: node, key: cast[string](nil))
proc jsonFromScalar(content: string, tag: TagId,
typeHint: YamlTypeHint): JsonNode =
proc jsonFromScalar(content: string, tag: TagId): JsonNode =
new(result)
var mappedType: YamlTypeHint
var mappedType: TypeHint
case tag
of yTagQuestionMark:
mappedType = typeHint
mappedType = guessType(content)
of yTagExclamationMark, yTagString:
mappedType = yTypeString
of yTagBoolean:
case typeHint
case guessType(content)
of yTypeBoolTrue:
mappedType = yTypeBoolTrue
of yTypeBoolFalse:
@ -90,15 +89,13 @@ proc constructJson*(s: YamlStream): seq[JsonNode] =
if levels.len == 0:
# parser ensures that next event will be yamlEndDocument
levels.add((node: jsonFromScalar(event.scalarContent,
event.scalarTag,
event.scalarType), key: nil))
event.scalarTag), key: nil))
continue
case levels[levels.high].node.kind
of JArray:
let jsonScalar = jsonFromScalar(event.scalarContent,
event.scalarTag,
event.scalarType)
event.scalarTag)
levels[levels.high].node.elems.add(jsonScalar)
if event.scalarAnchor != yAnchorNone:
anchors[event.scalarAnchor] = jsonScalar
@ -111,8 +108,7 @@ proc constructJson*(s: YamlStream): seq[JsonNode] =
"scalar keys may not have anchors in JSON")
else:
let jsonScalar = jsonFromScalar(event.scalarContent,
event.scalarTag,
event.scalarType)
event.scalarTag)
levels[levels.high].node.fields.add(
(key: levels[levels.high].key, val: jsonScalar))
levels[levels.high].key = nil

View File

@ -47,9 +47,9 @@ type
ylDirective, ylDefineTagHandle, ylDefineTagURI, ylMajorVersion,
ylMinorVersion, ylUnknownDirectiveParam, ylDirectiveComment,
# scalar reading states
ylPlainScalar, ylPlainScalarNone, ylSingleQuotedScalar,
ylDoublyQuotedScalar, ylEscape, ylBlockScalar, ylBlockScalarHeader,
ylSpaceAfterPlainScalar, ylSpaceAfterQuotedScalar,
ylPlainScalar, ylSingleQuotedScalar, ylDoublyQuotedScalar, ylEscape,
ylBlockScalar, ylBlockScalarHeader, ylSpaceAfterPlainScalar,
ylSpaceAfterQuotedScalar,
# indentation
ylIndentation,
# comments
@ -61,30 +61,6 @@ type
# anchoring
ylAnchor, ylAlias
YamlTypeHintState = enum
ythInitial,
ythF, ythFA, ythFAL, ythFALS, ythFALSE,
ythN, ythNU, ythNUL, ythNULL,
ythNO,
ythO, ythON,
ythOF, ythOFF,
ythT, ythTR, ythTRU, ythTRUE,
ythY, ythYE, ythYES,
ythPoint, ythPointI, ythPointIN, ythPointINF,
ythPointN, ythPointNA, ythPointNAN,
ythLowerFA, ythLowerFAL, ythLowerFALS,
ythLowerNU, ythLowerNUL,
ythLowerOF,
ythLowerTR, ythLowerTRU,
ythLowerYE,
ythPointLowerIN, ythPointLowerN, ythPointLowerNA,
ythMinus, yth0, ythInt, ythDecimal, ythNumE, ythNumEPlusMinus,
ythExponent, ythNone
YamlLexer = object of BaseLexer
indentations: seq[int]
encoding: Encoding
@ -92,7 +68,6 @@ type
charoffset: int
content*: string # my.content of the last returned token.
line*, column*: int
typeHint*: YamlTypeHint
const
UTF8NextLine = toUTF8(Rune(0x85))
@ -177,24 +152,6 @@ template yieldToken(kind: YamlLexerToken) {.dirty.} =
my.content = ""
template yieldScalarPart() {.dirty.} =
case typeHintState
of ythNULL:
my.typeHint = yTypeNull
of ythTRUE, ythON, ythYES, ythY:
my.typeHint = yTypeBoolTrue
of ythFALSE, ythOFF, ythNO, ythN:
my.typeHint = yTypeBoolFalse
of ythInt, yth0:
my.typeHint = yTypeInteger
of ythDecimal, ythExponent:
my.typeHint = yTypeFloat
of ythPointINF:
my.typeHint = yTypeFloatInf
of ythPointNAN:
my.typeHint = yTypeFloatNaN
else:
my.typeHint = yTypeUnknown
when defined(yamlDebug):
echo "Lexer token: tScalarPart(\"", my.content, "\".", typeHintState,
")"
@ -224,135 +181,6 @@ template handleLF() {.dirty.} =
template `or`(r: Rune, i: int): Rune =
cast[Rune](cast[int](r) or i)
macro typeHintStateMachine(c: untyped, content: untyped): stmt =
assert content.kind == nnkStmtList
result = newNimNode(nnkCaseStmt, content).add(copyNimNode(c))
for branch in content.children:
assert branch.kind == nnkOfBranch
var
charBranch = newNimNode(nnkOfBranch, branch)
i = 0
stateBranches = newNimNode(nnkCaseStmt, branch).add(
newIdentNode("typeHintState"))
while branch[i].kind != nnkStmtList:
charBranch.add(copyNimTree(branch[i]))
inc(i)
for rule in branch[i].children:
assert rule.kind == nnkInfix
assert ($rule[0].ident == "=>")
var stateBranch = newNimNode(nnkOfBranch, rule)
case rule[1].kind
of nnkBracket:
for item in rule[1].children:
stateBranch.add(item)
of nnkIdent:
stateBranch.add(rule[1])
else:
assert false
if rule[2].kind == nnkNilLit:
stateBranch.add(newStmtList(newNimNode(nnkDiscardStmt).add(
newEmptyNode())))
else:
stateBranch.add(newStmtList(newAssignment(
newIdentNode("typeHintState"), copyNimTree(rule[2]))))
stateBranches.add(stateBranch)
stateBranches.add(newNimNode(nnkElse).add(newStmtList(newAssignment(
newIdentNode("typeHintState"), newIdentNode("ythNone")),
newAssignment(newIdentNode("state"),
newIdentNode("ylPlainScalarNone")))))
charBranch.add(newStmtList(stateBranches))
result.add(charBranch)
result.add(newNimNode(nnkElse).add(newStmtList(newAssignment(
newIdentNode("typeHintState"), newIdentNode("ythNone")),
newAssignment(newIdentNode("state"),
newIdentNode("ylPlainScalarNone")))))
template advanceTypeHint(ch: char) {.dirty.} =
typeHintStateMachine ch:
of '~':
ythInitial => ythNULL
of '.':
[yth0, ythInt] => ythDecimal
[ythInitial, ythMinus] => ythPoint
of '+': ythNumE => ythNumEPlusMinus
of '-':
ythInitial => ythMinus
ythNumE => ythNumEPlusMinus
of '0':
[ythInitial, ythMinus] => yth0
[ythNumE, ythNumEPlusMinus] => ythExponent
of '1'..'9':
[ythInitial, ythMinus] => ythInt
[ythNumE, ythNumEPlusMinus] => ythExponent
[ythInt, ythDecimal, ythExponent] => nil
of 'a':
ythF => ythLowerFA
ythPointN => ythPointNA
ythPointLowerN => ythPointLowerNA
of 'A':
ythF => ythFA
ythPointN => ythPointNA
of 'e':
[yth0, ythInt, ythDecimal] => ythNumE
ythLowerFALS => ythFALSE
ythLowerTRU => ythTRUE
ythY => ythLowerYE
of 'E':
[yth0, ythInt, ythDecimal] => ythNumE
ythFALS => ythFALSE
ythTRU => ythTRUE
ythY => ythYE
of 'f':
ythInitial => ythF
ythO => ythLowerOF
ythLowerOF => ythOFF
ythPointLowerIN => ythPointINF
of 'F':
ythInitial => ythF
ythO => ythOF
ythOF => ythOFF
ythPointIN => ythPointINF
of 'i', 'I': ythPoint => ythPointI
of 'l':
ythLowerNU => ythLowerNUL
ythLowerNUL => ythNULL
ythLowerFA => ythLowerFAL
of 'L':
ythNU => ythNUL
ythNUL => ythNULL
ythFA => ythFAL
of 'n':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointLowerN
ythPointI => ythPointLowerIN
ythPointLowerNA => ythPointNAN
of 'N':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointN
ythPointI => ythPointIN
ythPointNA => ythPointNAN
of 'o', 'O':
ythInitial => ythO
ythN => ythNO
of 'r': ythT => ythLowerTR
of 'R': ythT => ythTR
of 's':
ythLowerFAL => ythLowerFALS
ythLowerYE => ythYES
of 'S':
ythFAL => ythFALS
ythYE => ythYES
of 't', 'T': ythInitial => ythT
of 'u':
ythN => ythLowerNU
ythLowerTR => ythLowerTRU
of 'U':
ythN => ythNU
ythTR => ythTRU
of 'y', 'Y': ythInitial => ythY
iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
var
# the following three values are used for parsing escaped unicode chars
@ -370,7 +198,6 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
# Lexer must know whether it parses block or flow style. Therefore,
# it counts the number of open flow arrays / maps here
state: YamlLexerState = ylInitial # lexer state
typeHintState: YamlTypeHintState = ythInitial
# for giving type hints of plain scalars
lastIndentationLength = 0
# after parsing the indentation of the line, this will hold the
@ -444,20 +271,14 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(tLineStart)
my.content = tmp
my.column = curPos
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
continue
else:
let tmp = my.content
my.content = ""
yieldToken(tLineStart)
my.content = tmp
if my.content.len == 1:
typeHintState = ythMinus
state = ylPlainScalar
else:
typeHintState = ythNone
state = ylPlainScalarNone
state = ylPlainScalar
continue
of ylDots:
case c
@ -469,12 +290,10 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
yieldToken(tDocumentEnd)
state = ylDirectiveLineEnd
else:
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
continue
else:
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
continue
of ylDirectiveLineEnd:
case c
@ -643,41 +462,14 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
state = ylPlainScalarNone
typeHintState = ythNone
of '[', ']', '{', '}':
yieldScalarPart()
state = ylInitialInLine
continue
else:
advanceTypeHint(c)
my.content.add(c)
of ylPlainScalarNone:
case c
of EndOfFile, '\r', '\x0A':
yieldScalarPart()
state = ylLineEnd
continue
of ':':
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
of ' ':
state = ylSpaceAfterPlainScalar
continue
of ',':
if flowDepth > 0:
lastSpecialChar = c
state = ylSpaceAfterPlainScalar
else:
my.content.add(c)
state = ylPlainScalar
of '[', ']', '{', '}':
yieldScalarPart()
state = ylInitialInLine
continue
else:
my.content.add(c)
of ylSpaceAfterPlainScalar:
if lastSpecialChar != '\0':
case c
@ -689,8 +481,7 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(lastSpecialChar)
lastSpecialChar = '\0'
trailingSpace = ""
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
continue
case c
@ -708,8 +499,7 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
of ':', '#':
lastSpecialChar = c
of '[', ']', '{', '}':
@ -721,8 +511,7 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(trailingSpace)
my.content.add(c)
trailingSpace = ""
state = ylPlainScalarNone
typeHintState = ythNone
state = ylPlainScalar
of ylInitialInLine:
if lastSpecialChar != '\0':
@ -763,11 +552,9 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.column = curPos - 1
else:
my.content.add(lastSpecialChar)
advanceTypeHint(lastSpecialChar)
lastSpecialChar = '\0'
my.column = curPos - 1
state = ylPlainScalar
typeHintState = ythInitial
continue
case c
of '\r', '\x0A', EndOfFile:
@ -780,8 +567,6 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c
my.column = curPos
state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of '[':
inc(flowDepth)
yieldToken(tOpeningBracket)
@ -822,8 +607,6 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c
my.column = curPos
state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of '?', ':':
my.column = curPos
lastSpecialChar = c
@ -839,8 +622,6 @@ iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "" & c
my.column = curPos
state = ylPlainScalar
typeHintState = ythInitial
advanceTypeHint(c)
of ylComment, ylDirectiveComment:
case c
of EndOfFile, '\r', '\x0A':

View File

@ -112,11 +112,10 @@ proc resolveTag(parser: YamlSequentialParser, tag: var string,
result = parser.tagLib.registerUri(tag)
tag = ""
template yieldScalar(content: string, typeHint: YamlTypeHint,
quoted: bool = false) {.dirty.} =
template yieldScalar(content: string, quoted: bool = false) {.dirty.} =
when defined(yamlDebug):
echo "Parser token [mode=", level.mode, ", state=", state, "]: ",
"scalar[\"", content, "\", type=", typeHint, "]"
"scalar[\"", content, "\"]"
if objectTag.len > 0:
if tag.len > 0:
raiseError("Duplicate tag for scalar (tag=" & tag & ", objectTag=" &
@ -124,7 +123,7 @@ template yieldScalar(content: string, typeHint: YamlTypeHint,
tag = objectTag
objectTag = ""
var e = scalarEvent(nil, resolveTag(parser, tag, quoted),
resolveAnchor(parser, anchor), typeHint)
resolveAnchor(parser, anchor))
shallowCopy(e.scalarContent, content)
yield e
@ -158,7 +157,7 @@ template yieldDocumentEnd() {.dirty.} =
template closeLevel(lvl: DocumentLevel) {.dirty.} =
case lvl.mode
of mExplicitBlockMapKey, mFlowMapKey:
yieldScalar("", yTypeUnknown)
yieldScalar("")
yield YamlStreamEvent(kind: yamlEndMap)
of mImplicitBlockMapKey, mBlockMapValue, mFlowMapValue:
yield YamlStreamEvent(kind: yamlEndMap)
@ -167,10 +166,10 @@ template closeLevel(lvl: DocumentLevel) {.dirty.} =
of mScalar:
when defined(yamlDebug):
echo "Parser token [mode=", level.mode, ", state=", state, "]: ",
"scalar[\"", scalarCache, "\", type=", scalarCacheType, "]"
yieldScalar(scalarCache, scalarCacheType)
"scalar[\"", scalarCache, "\"]"
yieldScalar(scalarCache)
else:
yieldScalar("", yTypeUnknown)
yieldScalar("")
proc mustLeaveLevel(curCol: int, ancestry: seq[DocumentLevel]): bool =
if ancestry.len == 0:
@ -216,7 +215,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode],
# `in` does not work if possible is [], so we have to check for that
when possible.len > 0:
if level.mode in possible:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level.mode = next
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
@ -241,7 +240,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode],
anchor = cachedAnchor
tag = cachedTag
objectTag = ""
yieldScalar("", yTypeUnknown)
yieldScalar("")
else:
tag = objectTag
objectTag = ""
@ -254,7 +253,6 @@ template startPlainScalar() {.dirty.} =
level.mode = mScalar
level.indentationColumn = lex.column
scalarCache = lex.content
scalarCacheType = lex.typeHint
scalarIndentation = lex.column
state = ypBlockAfterScalar
@ -301,7 +299,6 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
objectTag: string = ""
anchor: string = ""
scalarCache: string = nil
scalarCacheType: YamlTypeHint
scalarIndentation: int
scalarCacheIsQuoted: bool = false
aliasCache = yAnchorNone
@ -428,17 +425,15 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
level.indentationColumn = lex.column
of mImplicitBlockMapKey:
scalarCache = lex.content
scalarCacheType = lex.typeHint
scalarCacheIsQuoted = false
scalarIndentation = lex.column
of mBlockMapValue:
scalarCache = lex.content
scalarCacheType = lex.typeHint
scalarCacheIsQuoted = false
scalarIndentation = lex.column
level.mode = mImplicitBlockMapKey
of mExplicitBlockMapKey:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level.mode = mBlockMapValue
continue
else:
@ -449,7 +444,6 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
case level.mode
of mUnknown, mImplicitBlockMapKey:
scalarCache = lex.content
scalarCacheType = yTypeString
scalarCacheIsQuoted = true
scalarIndentation = lex.column
state = ypBlockAfterScalar
@ -495,7 +489,6 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
state = ypBlockLineStart
continue
scalarCache &= " " & lex.content
scalarCacheType = yTypeUnknown
state = ypBlockLineEnd
of tLineStart:
discard
@ -526,15 +519,14 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
yieldScalar(scalarCache, scalarCacheType, scalarCacheIsQuoted)
yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarCache = nil
state = ypBlockAfterColon
of tLineStart:
if level.mode == mImplicitBlockMapKey:
raiseError("Missing colon after implicit map key")
if level.mode != mScalar:
yieldScalar(scalarCache, scalarCacheType,
scalarCacheIsQuoted)
yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarCache = nil
if ancestry.len > 0:
level = ancestry.pop()
@ -544,7 +536,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
else:
state = ypBlockMultilineScalar
of tStreamEnd:
yieldScalar(scalarCache, scalarCacheType, scalarCacheIsQuoted)
yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarCache = nil
if ancestry.len > 0:
level = ancestry.pop()
@ -655,7 +647,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
of ypBlockAfterColon:
case token
of tScalar:
yieldScalar(lex.content, yTypeUnknown, true)
yieldScalar(lex.content, true)
level = ancestry.pop()
assert level.mode == mBlockMapValue
level.mode = mImplicitBlockMapKey
@ -793,15 +785,15 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
of tLineStart:
discard
of tScalar:
yieldScalar(lex.content, yTypeUnknown, true)
yieldScalar(lex.content, true)
level = ancestry.pop()
state = ypFlowAfterObject
of tScalarPart:
yieldScalar(lex.content, lex.typeHint)
yieldScalar(lex.content)
level = ancestry.pop()
state = ypFlowAfterObject
of tColon:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level = ancestry.pop()
if level.mode == mFlowMapKey:
level.mode = mFlowMapValue
@ -813,7 +805,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
of tQuestionMark:
state = ypFlowAfterQuestionMark
of tComma:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level = ancestry.pop()
case level.mode
of mFlowMapValue:
@ -822,7 +814,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of mFlowSequenceItem:
yieldScalar("", yTypeUnknown)
yieldScalar("")
else:
raiseError("Internal error! Please report this bug.")
of tOpeningBrace:
@ -851,7 +843,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
indentationColumn: -1)
of tClosingBrace:
if level.mode == mUnknown:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level = ancestry.pop()
if level.mode != mFlowMapKey:
yieldUnexpectedToken($level.mode)
@ -867,7 +859,7 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream =
state = ypExpectingDocumentEnd
of tClosingBracket:
if level.mode == mUnknown:
yieldScalar("", yTypeUnknown)
yieldScalar("")
level = ancestry.pop()
if level.mode != mFlowSequenceItem:
yieldUnexpectedToken()

View File

@ -198,17 +198,18 @@ proc present*(s: YamlStream, target: Stream, tagLib: YamlTagLibrary,
item.scalarTag, tagLib, item.scalarAnchor)
if style == ypsJson:
let hint = guessType(item.scalarContent)
if item.scalarTag in [yTagQuestionMark, yTagBoolean] and
item.scalarType in [yTypeBoolTrue, yTypeBoolFalse]:
if item.scalarType == yTypeBoolTrue:
hint in [yTypeBoolTrue, yTypeBoolFalse]:
if hint == yTypeBoolTrue:
safeWrite("true")
else:
safeWrite("false")
elif item.scalarTag in [yTagQuestionMark, yTagNull] and
item.scalarType == yTypeNull:
hint == yTypeNull:
safeWrite("null")
elif item.scalarTag in [yTagQuestionMark, yTagFloat] and
item.scalarType in [yTypeFloatInf, yTypeFloatNaN]:
hint in [yTypeFloatInf, yTypeFloatNaN]:
raise newException(YamlPresenterJsonError,
"Infinity and not-a-number values cannot be presented as JSON!")
else:
@ -469,7 +470,7 @@ proc transform*(input: Stream, output: Stream, style: YamlPresentationStyle,
event.seqTag = yTagSequence
of yamlScalar:
if event.scalarTag == yTagQuestionMark:
case event.scalarType
case guessType(event.scalarContent)
of yTypeInteger:
event.scalarTag = yTagInteger
of yTypeFloat, yTypeFloatInf, yTypeFloatNaN:

View File

@ -36,12 +36,6 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) =
expected[i].content, "\", got \"", lex.content, "\")"
fail()
break
if token == tScalarPart:
if lex.typeHint != expected[i].typeHint:
echo "wrong type hint (expected ", expected[i].typeHint,
", got ", lex.typeHint, ")"
fail()
break
inc(i)
if i < expected.len:
echo "received less tokens than expected (first missing = ",

View File

@ -8,19 +8,9 @@ proc startDoc(): YamlStreamEvent =
proc endDoc(): YamlStreamEvent =
result.kind = yamlEndDocument
proc scalar(content: string, typeHint: YamlTypeHint,
tag: TagId = yTagQuestionMark, anchor: AnchorId = yAnchorNone):
YamlStreamEvent =
result.kind = yamlScalar
result.scalarAnchor = anchor
result.scalarTag = tag
result.scalarContent = content
result.scalarType = typeHint
proc scalar(content: string,
tag: TagId = yTagQuestionMark, anchor: AnchorId = yAnchorNone):
YamlStreamEvent =
result = scalar(content, yTypeUnknown, tag, anchor)
YamlStreamEvent = scalarEvent(content, tag, anchor)
proc startSequence(tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone):
@ -72,9 +62,6 @@ proc printDifference(expected, actual: YamlStreamEvent) =
", got ",
cast[int](actual.scalarContent[i]), ")"
break
elif expected.scalarType != actual.scalarType:
echo "[scalar] expected type hint ", expected.scalarType,
", got ", actual.scalarType
else:
echo "[scalar] Unknown difference"
of yamlStartMap:
@ -130,23 +117,23 @@ suite "Parsing":
ensure("Scalar", startDoc(), scalar("Scalar"), endDoc())
test "Parsing: Simple Sequence":
ensure("- off", startDoc(), startSequence(),
scalar("off", yTypeBoolFalse), endSequence(), endDoc())
scalar("off"), endSequence(), endDoc())
test "Parsing: Simple Map":
ensure("42: value\nkey2: -7.5", startDoc(), startMap(),
scalar("42", yTypeInteger), scalar("value"), scalar("key2"),
scalar("-7.5", yTypeFloat), endMap(), endDoc())
scalar("42"), scalar("value"), scalar("key2"),
scalar("-7.5"), endMap(), endDoc())
test "Parsing: Explicit Map":
ensure("? null\n: value\n? ON\n: value2", startDoc(), startMap(),
scalar("null", yTypeNull), scalar("value"),
scalar("ON", yTypeBoolTrue), scalar("value2"),
scalar("null"), scalar("value"),
scalar("ON"), scalar("value2"),
endMap(), endDoc())
test "Parsing: Mixed Map (explicit to implicit)":
ensure("? a\n: 13\n1.5: d", startDoc(), startMap(), scalar("a"),
scalar("13", yTypeInteger), scalar("1.5", yTypeFloat),
scalar("13"), scalar("1.5"),
scalar("d"), endMap(), endDoc())
test "Parsing: Mixed Map (implicit to explicit)":
ensure("a: 4.2\n? 23\n: d", startDoc(), startMap(), scalar("a"),
scalar("4.2", yTypeFloat), scalar("23", yTypeInteger),
scalar("4.2"), scalar("23"),
scalar("d"), endMap(), endDoc())
test "Parsing: Missing values in map":
ensure("? a\n? b\nc:", startDoc(), startMap(), scalar("a"), scalar(""),
@ -174,11 +161,11 @@ suite "Parsing":
startSequence(), scalar("l1_i1"), scalar("l1_i2"), endSequence(),
scalar("l2_i1"), endSequence(), endDoc())
test "Parsing: Flow Sequence":
ensure("[2, b]", startDoc(), startSequence(), scalar("2", yTypeInteger),
ensure("[2, b]", startDoc(), startSequence(), scalar("2"),
scalar("b"), endSequence(), endDoc())
test "Parsing: Flow Map":
ensure("{a: Y, 1.337: d}", startDoc(), startMap(), scalar("a"),
scalar("Y", yTypeBoolTrue), scalar("1.337", yTypeFloat),
scalar("Y"), scalar("1.337"),
scalar("d"), endMap(), endDoc())
test "Parsing: Flow Sequence in Flow Sequence":
ensure("[a, [b, c]]", startDoc(), startSequence(), scalar("a"),
@ -217,7 +204,7 @@ suite "Parsing":
scalar("a"), scalar("ab"), endMap(), endDoc())
test "Parsing: non-specific tags of quoted strings":
ensure("\"a\"", startDoc(),
scalar("a", yTypeString, yTagExclamationMark), endDoc())
scalar("a", yTagExclamationMark), endDoc())
test "Parsing: explicit non-specific tag":
ensure("! a", startDoc(), scalar("a", yTagExclamationMark), endDoc())
test "Parsing: secondary tag handle resolution":

View File

@ -21,7 +21,7 @@ import streams, unicode, lexbase, tables, strutils, json, hashes, queues, macros
export streams, tables, json
type
YamlTypeHint* = enum
TypeHint* = enum
## A type hint is a friendly message from the YAML lexer, telling you
## it thinks a scalar string probably is of a certain type. You are not
## required to adhere to this information. The first matching RegEx will
@ -81,9 +81,6 @@ type
## specification. These are by convention mapped to the ``TagId`` s
## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively.
## Mapping is done by a `YamlTagLibrary <#YamlTagLibrary>`_.
##
## The value ``scalarType`` is a hint from the lexer, see
## `YamlTypeHint <#YamlTypeHint>`_.
case kind*: YamlStreamEventKind
of yamlStartMap:
mapAnchor* : AnchorId
@ -95,7 +92,6 @@ type
scalarAnchor* : AnchorId
scalarTag* : TagId
scalarContent*: string # may not be nil (but empty)
scalarType* : YamlTypeHint
of yamlEndMap, yamlEndSequence, yamlStartDocument, yamlEndDocument:
discard
of yamlAlias:
@ -300,18 +296,19 @@ proc `==`*(left: YamlStreamEvent, right: YamlStreamEvent): bool
proc `$`*(event: YamlStreamEvent): string
## outputs a human-readable string describing the given event
proc startDocEvent*(): YamlStreamEvent {.inline.}
proc endDocEvent*(): YamlStreamEvent {.inline.}
proc startDocEvent*(): YamlStreamEvent {.inline, raises: [].}
proc endDocEvent*(): YamlStreamEvent {.inline, raises: [].}
proc startMapEvent*(tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline.}
proc endMapEvent*(): YamlStreamEvent {.inline.}
anchor: AnchorId = yAnchorNone):
YamlStreamEvent {.inline, raises: [].}
proc endMapEvent*(): YamlStreamEvent {.inline, raises: [].}
proc startSeqEvent*(tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline.}
proc endSeqEvent*(): YamlStreamEvent {.inline.}
anchor: AnchorId = yAnchorNone):
YamlStreamEvent {.inline, raises: [].}
proc endSeqEvent*(): YamlStreamEvent {.inline, raises: [].}
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone,
typeHint: YamlTypeHint = yTypeUnknown):
YamlStreamEvent {.inline.}
anchor: AnchorId = yAnchorNone):
YamlStreamEvent {.inline, raises: [].}
proc `==`*(left, right: TagId): bool {.borrow.}
proc `$`*(id: TagId): string
@ -361,6 +358,8 @@ proc extendedTagLibrary*(): YamlTagLibrary
## - ``!!value``
## - ``!!yaml``
proc guessType*(scalar: string): TypeHint {.raises: [].}
proc newParser*(tagLib: YamlTagLibrary): YamlSequentialParser
## Instanciates a parser
@ -412,4 +411,5 @@ include private.tagLibrary
include private.events
include private.parser
include private.json
include private.presenter
include private.presenter
include private.hints

View File

@ -241,9 +241,7 @@ proc construct*(s: YamlStream, result: var string) =
proc serialize*(value: string,
tagStyle: YamlTagStyle = ytsNone): YamlStream =
result = iterator(): YamlStreamEvent =
yield YamlStreamEvent(kind: yamlScalar,
scalarTag: presentTag(string, tagStyle),
scalarAnchor: yAnchorNone, scalarContent: value)
yield scalarEvent(value, presentTag(string, tagStyle), yAnchorNone)
proc yamlTag*(T: typedesc[int]): TagId {.inline.} = yTagInteger
@ -252,16 +250,15 @@ proc construct*(s: YamlStream, result: var int) =
if finished(s) or item.kind != yamlScalar:
raise newException(ValueError, "Construction error!")
if item.scalarTag != yTagInteger and not (
item.scalarTag == yTagQuestionMark and item.scalarType == yTypeInteger):
item.scalarTag == yTagQuestionMark and
guessType(item.scalarContent) == yTypeInteger):
raise newException(ValueError, "Wrong scalar type for int.")
result = parseInt(item.scalarContent)
proc serialize*(value: int,
tagStyle: YamlTagStyle = ytsNone): YamlStream =
result = iterator(): YamlStreamEvent =
yield YamlStreamEvent(kind: yamlScalar,
scalarTag: presentTag(int, tagStyle),
scalarAnchor: yAnchorNone, scalarContent: $value)
yield scalarEvent($value, presentTag(int, tagStyle), yAnchorNone)
proc yamlTag*(T: typedesc[int64]): TagId {.inline.} = yTagInteger
@ -270,16 +267,15 @@ proc contruct*(s: YamlStream, result: var int64) =
if finished(s) or item.kind != yamlScalar:
raise newException(ValueError, "Construction error!")
if item.scalarTag != yTagInteger and not (
item.scalarTag == yTagQuestionMark and item.scalarType == yTypeInteger):
item.scalarTag == yTagQuestionMark and
guessType(item.scalarContent) == yTypeInteger):
raise newException(ValueError, "Wrong scalar type for int64.")
result = parseBiggestInt(item.scalarContent)
proc serialize*(value: int64,
tagStyle: YamlTagStyle = ytsNone): YamlStream =
result = iterator(): YamlStreamEvent =
yield YamlStreamEvent(kind: yamlScalar,
scalarTag: presentTag(int64, tagStyle),
scalarAnchor: yAnchorNone, scalarContent: $value)
yield scalarEvent($value, presentTag(int64, tagStyle), yAnchorNone)
proc yamlTag*(T: typedesc[float]): TagId {.inline.} = yTagFloat
@ -287,10 +283,12 @@ proc construct*(s: YamlStream, result: var float) =
let item = s()
if finished(s) or item.kind != yamlScalar:
raise newException(ValueError, "Construction error!")
let hint = guessType(item.scalarContent)
if item.scalarTag != yTagFloat and not (
item.scalarTag == yTagQuestionMark and item.scalarType == yTypeFloat):
item.scalarTag == yTagQuestionMark and
hint in [yTypeFloat, yTypeFloatInf, yTypeFloatNaN]):
raise newException(ValueError, "Wrong scalar type for float.")
case item.scalarType
case hint
of yTypeFloat:
result = parseFloat(item.scalarContent)
of yTypeFloatInf:
@ -303,30 +301,20 @@ proc construct*(s: YamlStream, result: var float) =
else:
raise newException(ValueError, "Wrong scalar type for float.")
proc serialize*(value: float,
tagStyle: YamlTagStyle = ytsNone): YamlStream =
proc serialize*(value: float, tagStyle: YamlTagStyle = ytsNone): YamlStream =
result = iterator(): YamlStreamEvent =
var
asString: string
hint: YamlTypeHint
case value
of Inf:
asString = ".inf"
hint = yTypeFloatInf
of NegInf:
asString = "-.inf"
hint = yTypeFloatInf
of NaN:
asString = ".nan"
hint = yTypeFloatNaN
else:
asString = $value
hint = yTypeFloat
yield YamlStreamEvent(kind: yamlScalar,
scalarTag: presentTag(float, tagStyle),
scalarAnchor: yAnchorNone, scalarContent: asString,
scalarType: hint)
yield scalarEvent(asString, presentTag(float, tagStyle), yAnchorNone)
proc yamlTag*(T: typedesc[bool]): TagId {.inline.} = yTagBoolean
@ -334,34 +322,25 @@ proc construct*(s: YamlStream, result: var bool) =
let item = s()
if finished(s) or item.kind != yamlScalar:
raise newException(ValueError, "Construction error!")
let hint = guessType(item.scalarContent)
case item.scalarTag
of yTagQuestionMark:
case item.scalarType
of yTagQuestionMark, yTagBoolean:
case hint
of yTypeBoolTrue:
result = true
of yTypeBoolFalse:
result = false
else:
raise newException(ValueError, "Wrong scalar type for bool.")
of yTagBoolean:
if item.scalarContent.match(
re"y|Y|yes|Yes|YES|true|True|TRUE|on|On|ON"):
result = true
elif item.scalarContent.match(
re"n|N|no|No|NO|false|False|FALSE|off|Off|OFF"):
result = false
else:
raise newException(ValueError, "Wrong content for bool.")
raise newException(ValueError,
"Not a boolean: " & item.scalarContent)
else:
raise newException(ValueError, "Wrong scalar type for bool")
proc serialize*(value: bool,
tagStyle: YamlTagStyle = ytsNone): YamlStream =
result = iterator(): YamlStreamEvent =
yield YamlStreamEvent(kind: yamlScalar,
scalarTag: presentTag(bool, tagStyle),
scalarAnchor: yAnchorNone, scalarContent:
if value: "y" else: "n")
yield scalarEvent(if value: "y" else: "n", presentTag(bool, tagStyle),
yAnchorNone)
proc yamlTag*[I](T: typedesc[seq[I]]): TagId {.inline.} =
let uri = "!nim:seq(" & safeTagUri(yamlTag(I)) & ")"