Finished parser refactoring

* Re-implemented flow style handling
 * Deleted old parser code
This commit is contained in:
Felix Krause 2016-08-17 20:04:59 +02:00
parent 01b34290b7
commit a9b82b0e45
3 changed files with 327 additions and 1573 deletions

View File

@ -11,7 +11,6 @@ task tests, "Run all tests":
task yamlTestSuite, "Run YAML 1.2 test suite": task yamlTestSuite, "Run YAML 1.2 test suite":
--r --r
--verbosity:0 --verbosity:0
--d:yamlDebug
setCommand "c", "test/yamlTestSuite" setCommand "c", "test/yamlTestSuite"
task serializationTests, "Run serialization tests": task serializationTests, "Run serialization tests":

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,8 @@ type
chomp: ChompType chomp: ChompType
atSequenceItem: bool atSequenceItem: bool
recentWasMoreIndented: bool recentWasMoreIndented: bool
flowdepth: int
explicitFlowKey: bool
LevelEndResult = enum LevelEndResult = enum
lerNothing, lerOne, lerAdditionalMapEnd lerNothing, lerOne, lerAdditionalMapEnd
@ -625,7 +627,6 @@ proc blockScalarLine(s: ParserYamlStream, p: YamlParser):
p.content.add(p.lexer.buf[p.lexer.bufpos]) p.content.add(p.lexer.buf[p.lexer.bufpos])
p.lexer.bufpos.inc() p.lexer.bufpos.inc()
result = p.handleLineEnd(true) result = p.handleLineEnd(true)
debug(" (exiting with " & $p.newlines & " newlines)")
proc tagHandle(p: YamlParser, shorthandEnd: var int) proc tagHandle(p: YamlParser, shorthandEnd: var int)
{.raises: [YamlParserError].} = {.raises: [YamlParserError].} =
@ -829,6 +830,50 @@ proc handleBlockItemStart(p: YamlParser, e: var YamlStreamEvent): bool =
of fplScalar, fplSinglePairKey, fplSinglePairValue, fplDocument: of fplScalar, fplSinglePairKey, fplSinglePairValue, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind) internalError("Unexpected level kind: " & $p.level.kind)
proc handleFlowItemStart(p: YamlParser, e: var YamlStreamEvent): bool =
if p.level.kind == fplUnknown and
p.ancestry[p.ancestry.high].kind == fplSequence:
result = p.handlePossibleMapStart(e, true, true)
proc handleFlowPlainScalar(p: YamlParser, e: var YamlStreamEvent) =
p.content.reset()
p.startToken()
p.plainScalar(cFlow)
if p.lexer.buf[p.lexer.bufpos] in {'{', '}', '[', ']', ',', ':', '#'}:
discard
else:
p.newlines = 0
while true:
case p.lexer.buf[p.lexer.bufpos]
of ':':
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cFlow):
if p.newlines == 1:
p.content.add(' ')
p.newlines = 0
elif p.newlines > 1:
p.content.addMultiple(' ', p.newlines - 1)
p.newlines = 0
p.plainScalar(cFlow)
break
of '#', EndOfFile: break
of '\l':
p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos)
p.newlines.inc()
of '\c':
p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
p.newlines.inc()
of flowIndicators: break
of ' ', '\t': p.lexer.skipWhitespace()
else:
if p.newlines == 1:
p.content.add(' ')
p.newlines = 0
elif p.newlines > 1:
p.content.addMultiple(' ', p.newlines - 1)
p.newlines = 0
p.plainScalar(cFlow)
e = p.currentScalar()
# --- macros for defining parser states --- # --- macros for defining parser states ---
macro parserStates(names: varargs[untyped]): stmt = macro parserStates(names: varargs[untyped]): stmt =
@ -904,7 +949,8 @@ macro parserState(name: untyped, impl: untyped): stmt =
parserStates(initial, blockObjectStart, blockAfterPlainScalar, blockAfterObject, parserStates(initial, blockObjectStart, blockAfterPlainScalar, blockAfterObject,
scalarEnd, objectEnd, expectDocEnd, startDoc, afterDocument, scalarEnd, objectEnd, expectDocEnd, startDoc, afterDocument,
closeStream, closeMoreIndentedLevels, emitEmptyScalar, tagHandle, closeStream, closeMoreIndentedLevels, emitEmptyScalar, tagHandle,
anchor, alias, flow) anchor, alias, flow, leaveFlowMap, leaveFlowSeq, flowAfterObject,
leaveFlowSinglePairMap)
proc closeEverything(s: ParserYamlStream, p: YamlParser) = proc closeEverything(s: ParserYamlStream, p: YamlParser) =
p.indentation = -1 p.indentation = -1
@ -917,21 +963,17 @@ proc endLevel(p: YamlParser, s: YamlStream, e: var YamlStreamEvent):
case p.level.kind case p.level.kind
of fplSequence: of fplSequence:
e = endSeqEvent() e = endSeqEvent()
debug("endLevel: endSeq")
of fplMapKey: of fplMapKey:
e = endMapEvent() e = endMapEvent()
debug("endLevel: endMap")
of fplMapValue, fplSinglePairValue: of fplMapValue, fplSinglePairValue:
e = emptyScalar(p) e = emptyScalar(p)
p.level.kind = fplMapKey p.level.kind = fplMapKey
result = lerAdditionalMapEnd result = lerAdditionalMapEnd
debug("endLevel: emptyScalar")
of fplScalar: of fplScalar:
if ParserYamlStream(s).scalarType != stFlow: if ParserYamlStream(s).scalarType != stFlow:
case ParserYamlStream(s).chomp case ParserYamlStream(s).chomp
of ctKeep: of ctKeep:
if p.content.len == 0: p.newlines.inc(-1) if p.content.len == 0: p.newlines.inc(-1)
debug("adding kept newlines: " & $p.newlines)
p.content.addMultiple('\l', p.newlines) p.content.addMultiple('\l', p.newlines)
of ctClip: of ctClip:
if p.content.len != 0: if p.content.len != 0:
@ -939,7 +981,6 @@ proc endLevel(p: YamlParser, s: YamlStream, e: var YamlStreamEvent):
p.content.add('\l') p.content.add('\l')
of ctStrip: discard of ctStrip: discard
e = currentScalar(p) e = currentScalar(p)
debug("endLevel: scalar(\"" & e.scalarContent & "\")")
p.tag = yTagQuestionMark p.tag = yTagQuestionMark
p.anchor = yAnchorNone p.anchor = yAnchorNone
of fplUnknown: of fplUnknown:
@ -1005,6 +1046,16 @@ template handleObjectEnd(p: YamlParser, mayHaveEmptyValue: bool = false): bool =
internalError("Unexpected level kind: " & $p.level.kind) internalError("Unexpected level kind: " & $p.level.kind)
result result
proc leaveFlowLevel(s: ParserYamlStream, p: YamlParser,
e: var YamlStreamEvent): bool =
s.flowdepth.dec()
result = (p.endLevel(s, e) == lerOne) # lerAdditionalMapEnd cannot happen
if s.flowdepth == 0:
s.storedState = stateBlockAfterObject
else:
s.storedState = stateFlowAfterObject
s.nextImpl = stateObjectEnd
parserState initial: parserState initial:
case p.lexer.buf[p.lexer.bufpos] case p.lexer.buf[p.lexer.bufpos]
of '%': of '%':
@ -1036,7 +1087,6 @@ parserState initial:
of ' ', '\t': of ' ', '\t':
if not p.consumeLineIfEmpty(p.newlines): if not p.consumeLineIfEmpty(p.newlines):
p.indentation = p.lexer.getColNumber(p.lexer.bufpos) p.indentation = p.lexer.getColNumber(p.lexer.bufpos)
echo "startDocEvent B"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
@ -1051,12 +1101,10 @@ parserState initial:
p.startToken() p.startToken()
p.lexer.directivesEndMarker(success) p.lexer.directivesEndMarker(success)
if success: p.lexer.bufpos.inc(3) if success: p.lexer.bufpos.inc(3)
echo "startDocEvent C"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
else: else:
echo "startDocEvent D"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
@ -1374,7 +1422,6 @@ parserState expectDocEnd:
parserState startDoc: parserState startDoc:
p.initDocValues() p.initDocValues()
echo "startDocEvent E"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
@ -1391,7 +1438,6 @@ parserState afterDocument:
discard p.handleLineEnd(true) discard p.handleLineEnd(true)
else: else:
p.initDocValues() p.initDocValues()
echo "startDocEvent F"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
@ -1402,7 +1448,6 @@ parserState afterDocument:
if not p.consumeLineIfEmpty(p.newlines): if not p.consumeLineIfEmpty(p.newlines):
p.indentation = p.lexer.getColNumber(p.lexer.bufpos) p.indentation = p.lexer.getColNumber(p.lexer.bufpos)
p.initDocValues() p.initDocValues()
echo "startDocEvent A"
e = startDocEvent() e = startDocEvent()
result = true result = true
state = blockObjectStart state = blockObjectStart
@ -1445,17 +1490,14 @@ parserState closeMoreIndentedLevels:
of lerAdditionalMapEnd: return true of lerAdditionalMapEnd: return true
discard p.handleObjectEnd(false) discard p.handleObjectEnd(false)
return result return result
debug("c")
if p.level.kind == fplDocument: state = expectDocEnd if p.level.kind == fplDocument: state = expectDocEnd
else: state = stored else: state = stored
elif p.indentation == p.level.indentation: elif p.indentation == p.level.indentation:
debug("a")
let res = p.endLevel(s, e) let res = p.endLevel(s, e)
yAssert(res == lerOne) yAssert(res == lerOne)
result = true result = true
state = stored state = stored
else: else:
debug("b")
state = stored state = stored
parserState emitEmptyScalar: parserState emitEmptyScalar:
@ -1486,7 +1528,274 @@ parserState alias:
state = objectEnd state = objectEnd
parserState flow: parserState flow:
s.isFinished = true p.lexer.skipWhitespaceCommentsAndNewlines()
case p.lexer.buf[p.lexer.bufpos]
of '{':
if p.handleFlowItemStart(e): return true
e = p.objectStart(yamlStartMap)
result = true
ParserYamlStream(s).flowdepth.inc()
p.lexer.bufpos.inc()
ParserYamlStream(s).explicitFlowKey = false
of '[':
if p.handleFlowItemStart(e): return true
e = p.objectStart(yamlStartSeq)
result = true
ParserYamlStream(s).flowdepth.inc()
p.lexer.bufpos.inc()
of '}':
yAssert(p.level.kind == fplUnknown)
p.level = p.ancestry.pop()
p.lexer.bufpos.inc()
state = leaveFlowMap
of ']':
yAssert(p.level.kind == fplUnknown)
p.level = p.ancestry.pop()
p.lexer.bufpos.inc()
state = leaveFlowSeq
of ',':
yAssert(p.level.kind == fplUnknown)
p.level = p.ancestry.pop()
case p.level.kind
of fplSequence:
e = emptyScalar(p)
result = true
of fplMapValue:
e = emptyScalar(p)
result = true
p.level.kind = fplMapKey
ParserYamlStream(s).explicitFlowKey = false
of fplMapKey:
e = emptyScalar(p)
p.level.kind = fplMapValue
return true
of fplSinglePairValue:
e = emptyScalar(p)
result = true
p.level = p.ancestry.pop()
state = leaveFlowSinglePairMap
stored = flow
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
p.ancestry.add(p.level)
p.level = initLevel(fplUnknown)
p.lexer.bufpos.inc()
of ':':
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cFlow):
if p.handleFlowItemStart(e): return true
p.handleFlowPlainScalar(e)
result = true
state = objectEnd
stored = flowAfterObject
else:
p.level = p.ancestry.pop()
case p.level.kind
of fplSequence:
e = startMapEvent(p.tag, p.anchor)
result = true
debug("started single-pair map at " &
(if p.level.indentation == UnknownIndentation:
$p.indentation else: $p.level.indentation))
p.tag = yTagQuestionMark
p.anchor = yAnchorNone
if p.level.indentation == UnknownIndentation:
p.level.indentation = p.indentation
p.ancestry.add(p.level)
p.level = initLevel(fplSinglePairKey)
of fplMapValue, fplSinglePairValue:
p.startToken()
raise p.generateError("Unexpected token (expected ',')")
of fplMapKey:
e = emptyScalar(p)
result = true
p.level.kind = fplMapValue
of fplSinglePairKey:
e = emptyScalar(p)
result = true
p.level.kind = fplSinglePairValue
of fplUnknown, fplScalar, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
if p.level.kind != fplSinglePairKey: p.lexer.bufpos.inc()
p.ancestry.add(p.level)
p.level = initLevel(fplUnknown)
of '\'':
if p.handleFlowItemStart(e): return true
p.content.reset()
p.startToken()
p.singleQuotedScalar()
if p.tag == yTagQuestionMark: p.tag = yTagExclamationMark
e = currentScalar(p)
result = true
state = objectEnd
stored = flowAfterObject
of '"':
if p.handleFlowItemStart(e): return true
p.content.reset()
p.startToken()
p.doubleQuotedScalar()
if p.tag == yTagQuestionMark: p.tag = yTagExclamationMark
e = currentScalar(p)
result = true
state = objectEnd
stored = flowAfterObject
of '!':
if p.handleFlowItemStart(e): return true
p.handleTagHandle()
of '&':
if p.handleFlowItemStart(e): return true
p.handleAnchor()
of '*':
state = alias
stored = flowAfterObject
of '?':
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cFlow):
if p.handleFlowItemStart(e): return true
p.handleFlowPlainScalar(e)
result = true
state = objectEnd
stored = flowAfterObject
elif ParserYamlStream(s).explicitFlowKey:
p.startToken()
raise p.generateError("Duplicate '?' in flow mapping")
elif p.level.kind == fplUnknown:
case p.ancestry[p.ancestry.high].kind
of fplMapKey, fplMapValue, fplDocument: discard
of fplSequence:
e = p.objectStart(yamlStartMap, true)
result = true
else:
p.startToken()
raise p.generateError("Unexpected token")
ParserYamlStream(s).explicitFlowKey = true
p.lexer.bufpos.inc()
else:
ParserYamlStream(s).explicitFlowKey = true
p.lexer.bufpos.inc()
else:
if p.handleFlowItemStart(e): return true
p.handleFlowPlainScalar(e)
result = true
state = objectEnd
stored = flowAfterObject
parserState leaveFlowMap:
case p.level.kind
of fplMapValue:
e = emptyScalar(p)
p.level.kind = fplMapKey
return true
of fplMapKey:
if p.tag != yTagQuestionMark or p.anchor != yAnchorNone or
ParserYamlStream(s).explicitFlowKey:
e = emptyScalar(p)
p.level.kind = fplMapValue
ParserYamlStream(s).explicitFlowKey = false
return true
of fplSequence:
p.startToken()
raise p.generateError("Unexpected token (expected ']')")
of fplSinglePairValue:
p.startToken()
raise p.generateError("Unexpected token (expected ']')")
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
result = leaveFlowLevel(ParserYamlStream(s), p, e)
parserState leaveFlowSeq:
case p.level.kind
of fplSequence:
if p.tag != yTagQuestionMark or p.anchor != yAnchorNone:
e = emptyScalar(p)
return true
of fplSinglePairValue:
e = emptyScalar(p)
p.level = p.ancestry.pop()
state = leaveFlowSinglePairMap
stored = leaveFlowSeq
return true
of fplMapKey, fplMapValue:
p.startToken()
raise p.generateError("Unexpected token (expected '}')")
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
result = leaveFlowLevel(ParserYamlStream(s), p, e)
parserState leaveFlowSinglePairMap:
e = endMapEvent()
result = true
state = stored
parserState flowAfterObject:
p.lexer.skipWhitespaceCommentsAndNewlines()
case p.lexer.buf[p.lexer.bufpos]
of ']':
case p.level.kind
of fplSequence: discard
of fplMapKey, fplMapValue:
p.startToken()
raise p.generateError("Unexpected token (expected '}')")
of fplSinglePairValue:
p.level = p.ancestry.pop()
yAssert(p.level.kind == fplSequence)
e = endMapEvent()
return true
of fplScalar, fplUnknown, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
p.lexer.bufpos.inc()
result = leaveFlowLevel(ParserYamlStream(s), p, e)
of '}':
case p.level.kind
of fplMapKey, fplMapValue: discard
of fplSequence, fplSinglePairValue:
p.startToken()
raise p.generateError("Unexpected token (expected ']')")
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
p.lexer.bufpos.inc()
result = leaveFlowLevel(ParserYamlStream(s), p, e)
of ',':
case p.level.kind
of fplSequence: discard
of fplMapValue:
e = scalarEvent("", yTagQuestionMark, yAnchorNone)
result = true
p.level.kind = fplMapKey
ParserYamlStream(s).explicitFlowKey = false
of fplSinglePairValue:
p.level = p.ancestry.pop()
yAssert(p.level.kind == fplSequence)
e = endMapEvent()
result = true
of fplMapKey: ParserYamlStream(s).explicitFlowKey = false
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
p.ancestry.add(p.level)
p.level = initLevel(fplUnknown)
state = flow
p.lexer.bufpos.inc()
of ':':
case p.level.kind
of fplSequence, fplMapKey:
p.startToken()
raise p.generateError("Unexpected token (expected ',')")
of fplMapValue, fplSinglePairValue: discard
of fplUnknown, fplScalar, fplSinglePairKey, fplDocument:
internalError("Unexpected level kind: " & $p.level.kind)
p.ancestry.add(p.level)
p.level = initLevel(fplUnknown)
state = flow
p.lexer.bufpos.inc()
of '#':
p.lineEnding()
if p.handleLineEnd(true):
p.startToken()
raise p.generateError("Unclosed flow content")
of EndOfFile:
p.startToken()
raise p.generateError("Unclosed flow content")
else:
p.startToken()
raise p.generateError("Unexpected content (expected flow indicator)")
# --- parser initialization --- # --- parser initialization ---
@ -1508,6 +1817,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
p.initDocValues() p.initDocValues()
ParserYamlStream(result).p = p ParserYamlStream(result).p = p
ParserYamlStream(result).atSequenceItem = false ParserYamlStream(result).atSequenceItem = false
ParserYamlStream(result).flowdepth = 0
result.isFinished = false result.isFinished = false
result.peeked = false result.peeked = false
result.nextImpl = stateInitial result.nextImpl = stateInitial