Got rid of fpBlockLineStart, fixes #9

This commit is contained in:
Felix Krause 2016-03-20 13:50:00 +01:00
parent b6d363107a
commit a1f914eee4
1 changed files with 116 additions and 201 deletions

View File

@ -6,7 +6,7 @@
type type
FastParseState = enum FastParseState = enum
fpInitial, fpBlockLineStart, fpBlockAfterObject, fpBlockAfterPlainScalar, fpInitial, fpBlockAfterObject, fpBlockAfterPlainScalar,
fpBlockObjectStart, fpBlockContinueScalar, fpExpectDocEnd, fpFlow, fpBlockObjectStart, fpBlockContinueScalar, fpExpectDocEnd, fpFlow,
fpFlowAfterObject, fpAfterDocument fpFlowAfterObject, fpAfterDocument
@ -518,31 +518,23 @@ template tagUriMapping(lexer: BaseLexer, uri: var string) =
c = lexer.buf[lexer.bufpos] c = lexer.buf[lexer.bufpos]
else: lexerError(lexer, "Invalid tag uri") else: lexerError(lexer, "Invalid tag uri")
template directivesEndMarker(lexer: BaseLexer, template directivesEndMarker(lexer: var BaseLexer, success: var bool) =
token: var LexedPossibleDirectivesEnd) = debug("lex: directivesEndMarker")
debug("lex: directivesEnd") success = true
var p = lexer.bufpos + 1 for i in 0..2:
case lexer.buf[p] if lexer.buf[lexer.bufpos + i] != '-':
of '-': success = false
p.inc() break
if lexer.buf[p] == '-': if success: success = lexer.buf[lexer.bufpos + 3] in spaceOrLineEnd
p.inc()
if lexer.buf[p] in spaceOrLineEnd: token = lpdeDirectivesEnd
else: token = lpdeScalarContent
else: token = lpdeScalarContent
of spaceOrLineEnd: token = lpdeSequenceItem
else: token = lpdeScalarContent
template documentEndMarker(lexer: var BaseLexer, isDocumentEnd: var bool) = template documentEndMarker(lexer: var BaseLexer, success: var bool) =
var p = lexer.bufpos + 1 debug("lex: documentEndMarker")
if lexer.buf[p] == '.': success = true
p.inc() for i in 0..2:
if lexer.buf[p] == '.': if lexer.buf[lexer.bufpos + i] != '.':
p.inc() success = false
if lexer.buf[p] in spaceOrLineEnd: isDocumentEnd = true break
else: isDocumentEnd = false if success: success = lexer.buf[lexer.bufpos + 3] in spaceOrLineEnd
else: isDocumentEnd = false
else: isDocumentEnd = false
proc unicodeSequence(lexer: var BaseLexer, length: int): proc unicodeSequence(lexer: var BaseLexer, length: int):
string {.raises: [YamlParserError].} = string {.raises: [YamlParserError].} =
@ -739,7 +731,10 @@ template plainScalar(lexer: BaseLexer, content: var string,
content.add(after) content.add(after)
content.add(c2) content.add(c2)
break break
of lineEnd, flowIndicators: break of flowIndicators:
when context == cFlow: break
else: content.add(c)
of lineEnd: break
of ':': of ':':
if lexer.isPlainSafe(lexer.bufpos + 1, context): content.add(':') if lexer.isPlainSafe(lexer.bufpos + 1, context): content.add(':')
else: break outer else: break outer
@ -864,6 +859,8 @@ template blockScalar(lexer: BaseLexer, content: var string,
detectedIndent = false detectedIndent = false
recentLineMoreIndented = false recentLineMoreIndented = false
let parentIndent = ancestry[ancestry.high].indentation let parentIndent = ancestry[ancestry.high].indentation
assert(parentIndent != Unknownindentation,
"parent " & $ancestry[ancestry.high].kind & " has unknown indentation")
case lexer.buf[lexer.bufpos] case lexer.buf[lexer.bufpos]
of '|': literal = true of '|': literal = true
@ -914,7 +911,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
newlines.inc() newlines.inc()
break inner break inner
else: else:
stateAfter = if i == 1: fpBlockLineStart else: fpBlockObjectStart stateAfter = fpBlockObjectStart
break outer break outer
lexer.bufpos.inc() lexer.bufpos.inc()
if parentIndent == -1 and lexer.buf[lexer.bufpos] == '.': if parentIndent == -1 and lexer.buf[lexer.bufpos] == '.':
@ -937,7 +934,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
newlines.inc() newlines.inc()
break inner break inner
of EndOfFile: of EndOfFile:
stateAfter = fpBlockLineStart stateAfter = fpBlockObjectStart
break outer break outer
of '#': of '#':
lexer.lineEnding() lexer.lineEnding()
@ -945,7 +942,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
of '\l': lexer.bufpos = lexer.handleLF(lexer.bufpos) of '\l': lexer.bufpos = lexer.handleLF(lexer.bufpos)
of '\c': lexer.bufpos = lexer.handleCR(lexer.bufpos) of '\c': lexer.bufpos = lexer.handleCR(lexer.bufpos)
else: discard else: discard
stateAfter = fpBlockLineStart stateAfter = fpBlockObjectStart
break outer break outer
else: else:
startToken() startToken()
@ -964,14 +961,13 @@ template blockScalar(lexer: BaseLexer, content: var string,
newlines.inc() newlines.inc()
break inner break inner
of EndOfFile: of EndOfFile:
stateAfter = fpBlockLineStart stateAfter = fpBlockObjectStart
break outer break outer
else: else:
blockIndent = blockIndent =
lexer.getColNumber(lexer.bufpos) - max(0, parentIndent) lexer.getColNumber(lexer.bufpos) - max(0, parentIndent)
if blockIndent == 0 and parentIndent >= 0: if blockIndent == 0 and parentIndent >= 0:
stateAfter = if blockIndent + parentIndent > 0: stateAfter = fpBlockObjectStart
fpBlockObjectStart else: fpBlockLineStart
break outer break outer
detectedIndent = true detectedIndent = true
break break
@ -986,7 +982,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
newlines.inc() newlines.inc()
break inner break inner
of EndOfFile: of EndOfFile:
stateAfter = fpBlockLineStart stateAfter = fpBlockObjectStart
break outer break outer
of ' ', '\t': of ' ', '\t':
if not literal: if not literal:
@ -1019,7 +1015,7 @@ template blockScalar(lexer: BaseLexer, content: var string,
newlines.inc() newlines.inc()
break inner break inner
of EndOfFile: of EndOfFile:
stateAfter = fpBlockLineStart stateAfter = fpBlockObjectStart
break outer break outer
else: content.add(c) else: content.add(c)
lexer.bufpos.inc() lexer.bufpos.inc()
@ -1124,136 +1120,15 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(false) handleLineEnd(false)
of '-': of '-':
var token: LexedPossibleDirectivesEnd var success: bool
startToken() startToken()
p.lexer.directivesEndMarker(token) p.lexer.directivesEndMarker(success)
yield startDocEvent() yield startDocEvent()
case token if success:
of lpdeDirectivesEnd:
p.lexer.bufpos.inc(3) p.lexer.bufpos.inc(3)
state = fpBlockObjectStart state = fpBlockObjectStart
of lpdeSequenceItem:
indentation = 0
p.lexer.bufpos.inc()
handleBlockSequenceIndicator()
state = fpBlockObjectStart
of lpdeScalarContent:
content.setLen(0)
p.lexer.plainScalar(content, cBlock)
state = fpBlockAfterPlainScalar
else: else:
yield startDocEvent() yield startDocEvent()
state = fpBlockLineStart
of fpBlockLineStart:
debug("state: blockLineStart")
case p.lexer.buf[p.lexer.bufpos]
of '-':
var token: LexedPossibleDirectivesEnd
startToken()
p.lexer.directivesEndMarker(token)
case token
of lpdeDirectivesEnd:
p.lexer.bufpos.inc(3)
closeEverything()
initDocValues()
yield startDocEvent()
state = fpBlockObjectStart
of lpdeSequenceItem:
indentation = 0
closeMoreIndentedLevels(true)
p.lexer.bufpos.inc()
handleBlockSequenceIndicator()
state = fpBlockObjectStart
of lpdeScalarContent:
case level.kind
of fplScalar: continueMultilineScalar()
of fplUnknown: handlePossibleMapStart()
else:
ensureCorrectIndentation()
ancestry.add(level)
level = initLevel(fplUnknown)
content.setLen(0)
p.lexer.plainScalar(content, cBlock)
state = fpBlockAfterPlainScalar
of '.':
var isDocumentEnd: bool
startToken()
p.lexer.documentEndMarker(isDocumentEnd)
if isDocumentEnd:
closeEverything()
p.lexer.bufpos.inc(3)
p.lexer.lineEnding()
handleLineEnd(false)
state = fpAfterDocument
else:
indentation = 0
closeMoreIndentedLevels()
case level.kind
of fplUnknown: handlePossibleMapStart()
of fplScalar: continueMultilineScalar()
else:
ensureCorrectIndentation()
ancestry.add(level)
level = initLevel(fplUnknown)
content.setLen(0)
p.lexer.plainScalar(content, cBlock)
state = fpBlockAfterPlainScalar
of ' ':
p.lexer.skipIndentation()
let c = p.lexer.buf[p.lexer.bufpos]
if c in {'\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding()
handleLineEnd(true)
elif c == '\t':
indentation = p.lexer.getColNumber(p.lexer.bufpos)
p.lexer.bufpos.inc()
while p.lexer.buf[p.lexer.bufpos] in {'\t', ' '}:
p.lexer.bufpos.inc()
if p.lexer.buf[p.lexer.bufpos] in {'\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding()
handleLineEnd(true)
else:
closeMoreIndentedLevels(true)
if level.kind == fplScalar: state = fpBlockContinueScalar
else: lexerError(p.lexer, "tabular not allowed here")
else:
indentation = p.lexer.getColNumber(p.lexer.bufpos)
if c == '-' and not
p.lexer.isPlainSafe(p.lexer.bufpos + 1, if flowdepth == 0:
cBlock else: cFlow):
closeMoreIndentedLevels(true)
else: closeMoreIndentedLevels()
case level.kind
of fplScalar: state = fpBlockContinueScalar
of fplUnknown: state = fpBlockObjectStart
else:
ensureCorrectIndentation()
state = fpBlockObjectStart
of EndOfFile:
closeEverything()
break
of '\t':
indentation = 0
p.lexer.bufpos.inc()
while p.lexer.buf[p.lexer.bufpos] in {'\t', ' '}: p.lexer.bufpos.inc()
if p.lexer.buf[p.lexer.bufpos] in {'\l', '\c', '#', EndOfFile}:
p.lexer.lineEnding()
handleLineEnd(true)
else:
closeMoreIndentedLevels(true)
if level.kind == fplScalar: state = fpBlockContinueScalar
else: lexerError(p.lexer, "tabular not allowed here")
of '\l', '\c', '#':
p.lexer.lineEnding()
handleLineEnd(true)
else:
indentation = 0
closeMoreIndentedLevels()
case level.kind
of fplScalar: state = fpBlockContinueScalar
of fplUnknown: state = fpBlockObjectStart
else:
ensureCorrectIndentation()
state = fpBlockObjectStart state = fpBlockObjectStart
of fpBlockContinueScalar: of fpBlockContinueScalar:
debug("state: fpBlockContinueScalar") debug("state: fpBlockContinueScalar")
@ -1262,7 +1137,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
of '\l': of '\l':
newlines.inc() newlines.inc()
p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos)
state = fpBlockLineStart state = fpBlockObjectStart
of '\c': of '\c':
newlines.inc() newlines.inc()
p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
@ -1276,7 +1151,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
yieldShallowScalar(content) yieldShallowScalar(content)
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(true) handleLineEnd(true)
handleObjectEnd(fpBlockLineStart) handleObjectEnd(fpBlockObjectStart)
else: else:
continueMultilineScalar() continueMultilineScalar()
of fpBlockAfterPlainScalar: of fpBlockAfterPlainScalar:
@ -1290,7 +1165,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
newlines = 1 newlines = 1
level.kind = fplScalar level.kind = fplScalar
p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos)
state = fpBlockLineStart state = fpBlockObjectStart
of '\c': of '\c':
if level.kind notin {fplUnknown, fplScalar}: if level.kind notin {fplUnknown, fplScalar}:
startToken() startToken()
@ -1298,7 +1173,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
newlines = 1 newlines = 1
level.kind = fplScalar level.kind = fplScalar
p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
state = fpBlockLineStart state = fpBlockObjectStart
else: else:
yieldShallowScalar(content) yieldShallowScalar(content)
handleObjectEnd(fpBlockAfterObject) handleObjectEnd(fpBlockAfterObject)
@ -1310,10 +1185,10 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
closeEverything() closeEverything()
break break
of '\l': of '\l':
state = fpBlockLineStart state = fpBlockObjectStart
p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos)
of '\c': of '\c':
state = fpBlockLineStart state = fpBlockObjectStart
p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
of ':': of ':':
case level.kind case level.kind
@ -1342,32 +1217,49 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
of '#': of '#':
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(true) handleLineEnd(true)
state = fpBlockLineStart state = fpBlockObjectStart
else: else:
startToken() startToken()
parserError("Illegal token (expected ':', comment or line end)") parserError("Illegal token (expected ':', comment or line end)")
of fpBlockObjectStart: of fpBlockObjectStart:
debug("state: blockObjectStart") debug("state: blockObjectStart")
p.lexer.skipWhitespace() p.lexer.skipIndentation()
indentation = p.lexer.getColNumber(p.lexer.bufpos) indentation = p.lexer.getColNumber(p.lexer.bufpos)
if indentation == 0:
var success: bool
p.lexer.directivesEndMarker(success)
if success:
p.lexer.bufpos.inc(3)
closeEverything()
initDocValues()
yield startDocEvent()
continue
p.lexer.documentEndMarker(success)
if success:
closeEverything()
p.lexer.bufpos.inc(3)
p.lexer.lineEnding()
handleLineEnd(false)
state = fpAfterDocument
continue
case p.lexer.buf[p.lexer.bufpos] case p.lexer.buf[p.lexer.bufpos]
of '\l': of '\l':
p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleLF(p.lexer.bufpos)
state = fpBlockLineStart if level.kind == fplUnknown: level.indentation = UnknownIndentation
level.indentation = UnknownIndentation newlines.inc()
of '\c': of '\c':
p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos) p.lexer.bufpos = p.lexer.handleCR(p.lexer.bufpos)
state = fpBlockLineStart if level.kind == fplUnknown: level.indentation = UnknownIndentation
level.indentation = UnknownIndentation newlines.inc()
of EndOfFile: of EndOfFile:
closeEverything() closeEverything()
return return
of '#': of '#':
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(true) handleLineEnd(true)
state = fpBlockLineStart if level.kind == fplUnknown: level.indentation = UnknownIndentation
level.indentation = UnknownIndentation
of '\'': of '\'':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
startToken() startToken()
@ -1376,6 +1268,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
yieldShallowScalar(content) yieldShallowScalar(content)
handleObjectEnd(fpBlockAfterObject) handleObjectEnd(fpBlockAfterObject)
of '"': of '"':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
startToken() startToken()
@ -1384,6 +1277,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
yieldShallowScalar(content) yieldShallowScalar(content)
handleObjectEnd(fpBlockAfterObject) handleObjectEnd(fpBlockAfterObject)
of '|', '>': of '|', '>':
closeMoreIndentedLevels()
# TODO: this will scan for possible map start, which is not # TODO: this will scan for possible map start, which is not
# neccessary in this case # neccessary in this case
handleBlockItemStart() handleBlockItemStart()
@ -1399,28 +1293,39 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
closeMoreIndentedLevels() closeMoreIndentedLevels()
of '-': of '-':
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock): if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
closeMoreIndentedLevels()
if level.kind == fplScalar: continueMultilineScalar()
else:
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
startToken() startToken()
p.lexer.plainScalar(content, cBlock) p.lexer.plainScalar(content, cBlock)
state = fpBlockAfterPlainScalar state = fpBlockAfterPlainScalar
else: else:
closeMoreIndentedLevels(true)
p.lexer.bufpos.inc() p.lexer.bufpos.inc()
handleBlockSequenceIndicator() handleBlockSequenceIndicator()
of '!': of '!':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
handleTagHandle() handleTagHandle()
of '&': of '&':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
handleAnchor() handleAnchor()
of '*': of '*':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
handleAlias() handleAlias()
of '[', '{': of '[', '{':
closeMoreIndentedLevels()
handleBlockItemStart() handleBlockItemStart()
state = fpFlow state = fpFlow
of '?': of '?':
closeMoreIndentedLevels()
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock): if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
if level.kind == fplScalar: continueMultilineScalar()
else:
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
startToken() startToken()
@ -1430,7 +1335,10 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
p.lexer.bufpos.inc() p.lexer.bufpos.inc()
handleMapKeyIndicator() handleMapKeyIndicator()
of ':': of ':':
closeMoreIndentedLevels()
if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock): if p.lexer.isPlainSafe(p.lexer.bufpos + 1, cBlock):
if level.kind == fplScalar: continueMultilineScalar()
else:
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
startToken() startToken()
@ -1441,6 +1349,15 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
handleMapValueIndicator() handleMapValueIndicator()
of '@', '`': of '@', '`':
lexerError(p.lexer, "Reserved characters cannot start a plain scalar") lexerError(p.lexer, "Reserved characters cannot start a plain scalar")
of '\t':
closeMoreIndentedLevels()
if level.kind == fplScalar:
p.lexer.skipWhitespace()
continueMultilineScalar()
else: lexerError(p.lexer, "\\t cannot start any token")
else:
closeMoreIndentedLevels()
if level.kind == fplScalar: continueMultilineScalar()
else: else:
handleBlockItemStart() handleBlockItemStart()
content.setLen(0) content.setLen(0)
@ -1451,18 +1368,16 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
debug("state: expectDocEnd") debug("state: expectDocEnd")
case p.lexer.buf[p.lexer.bufpos] case p.lexer.buf[p.lexer.bufpos]
of '-': of '-':
var token: LexedPossibleDirectivesEnd var success: bool
p.lexer.directivesEndMarker(token) p.lexer.directivesEndMarker(success)
case token if success:
of lpdeDirectivesEnd:
p.lexer.bufpos.inc(3) p.lexer.bufpos.inc(3)
yield endDocEvent() yield endDocEvent()
discard ancestry.pop() discard ancestry.pop()
initDocValues() initDocValues()
yield startDocEvent() yield startDocEvent()
state = fpBlockObjectStart state = fpBlockObjectStart
else: else: parserError("Unexpected content (expected document end)")
parserError("Unexpected content (expected document end)")
of '.': of '.':
var isDocumentEnd: bool var isDocumentEnd: bool
startToken() startToken()
@ -1499,7 +1414,7 @@ proc parse*(p: YamlParser, s: Stream): YamlStream =
else: else:
initDocValues() initDocValues()
yield startDocEvent() yield startDocEvent()
state = fpBlockLineStart state = fpBlockObjectStart
of '#': of '#':
p.lexer.lineEnding() p.lexer.lineEnding()
handleLineEnd(false) handleLineEnd(false)