Enhanced empty line processing

This commit is contained in:
Felix Krause 2016-09-11 19:20:02 +02:00
parent b9286fa550
commit a1f900ae44
2 changed files with 57 additions and 8 deletions

View File

@ -34,12 +34,15 @@ type
# ltTagHandle # ltTagHandle
shorthandEnd*: int shorthandEnd*: int
# may be modified from outside; will be consumed at plain scalar starts
newlines*: int
# internals # internals
source: pointer source: pointer
inFlow: bool inFlow: bool
literalEndIndent: int literalEndIndent: int
nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl: nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl,
LexerState insideFlowImpl: LexerState
blockScalarIndent: int blockScalarIndent: int
c: char c: char
tokenLineGetter: proc(lex: YamlLexer): string tokenLineGetter: proc(lex: YamlLexer): string
@ -221,6 +224,7 @@ proc possibleDirectivesEnd[T](lex: YamlLexer): bool
proc possibleDocumentEnd[T](lex: YamlLexer): bool proc possibleDocumentEnd[T](lex: YamlLexer): bool
proc afterSeqInd[T](lex: YamlLexer): bool proc afterSeqInd[T](lex: YamlLexer): bool
proc insideDoc[T](lex: YamlLexer): bool {.locks:0.} proc insideDoc[T](lex: YamlLexer): bool {.locks:0.}
proc insideFlow[T](lex: YamlLexer): bool
proc insideLine[T](lex: YamlLexer): bool proc insideLine[T](lex: YamlLexer): bool
proc plainScalarPart[T](lex: YamlLexer): bool proc plainScalarPart[T](lex: YamlLexer): bool
proc blockScalarHeader[T](lex: YamlLexer): bool proc blockScalarHeader[T](lex: YamlLexer): bool
@ -252,6 +256,13 @@ proc directiveName[T](lex: YamlLexer) =
lex.buf.add(lex.c) lex.buf.add(lex.c)
lex.advance(T) lex.advance(T)
proc consumeNewlines(lex: YamlLexer) =
case lex.newlines
of 0: return
of 1: lex.buf.add(' ')
else: lex.buf.add(repeat('\l', lex.newlines - 1))
lex.newlines = 0
proc yamlVersion[T](lex: YamlLexer): bool = proc yamlVersion[T](lex: YamlLexer): bool =
debug("lex: yamlVersion") debug("lex: yamlVersion")
while lex.c in space: lex.advance(T) while lex.c in space: lex.advance(T)
@ -372,13 +383,16 @@ proc possibleDirectivesEnd[T](lex: YamlLexer): bool =
lex.cur = ltDirectivesEnd lex.cur = ltDirectivesEnd
lex.nextState = insideLine[T] lex.nextState = insideLine[T]
return true return true
lex.consumeNewlines()
lex.buf.add('-') lex.buf.add('-')
else: lex.consumeNewlines()
lex.buf.add('-') lex.buf.add('-')
elif lex.c in spaceOrLineEnd: elif lex.c in spaceOrLineEnd:
lex.indentation = 0 lex.indentation = 0
lex.cur = ltIndentation lex.cur = ltIndentation
lex.nextState = afterSeqInd[T] lex.nextState = afterSeqInd[T]
return true return true
else: lex.consumeNewlines()
lex.buf.add('-') lex.buf.add('-')
lex.nextState = plainScalarPart[T] lex.nextState = plainScalarPart[T]
result = false result = false
@ -401,8 +415,11 @@ proc possibleDocumentEnd[T](lex: YamlLexer): bool =
lex.nextState = expectLineEnd[T] lex.nextState = expectLineEnd[T]
lex.lineStartState = outsideDoc[T] lex.lineStartState = outsideDoc[T]
return true return true
lex.consumeNewlines()
lex.buf.add('.') lex.buf.add('.')
else: lex.consumeNewlines()
lex.buf.add('.') lex.buf.add('.')
else: lex.consumeNewlines()
lex.buf.add('.') lex.buf.add('.')
lex.nextState = plainScalarPart[T] lex.nextState = plainScalarPart[T]
result = false result = false
@ -472,10 +489,22 @@ proc insideDoc[T](lex: YamlLexer): bool =
lex.cur = ltIndentation lex.cur = ltIndentation
result = true result = true
proc insideFlow[T](lex: YamlLexer): bool =
debug("lex: insideFlow")
startToken[T](lex)
while lex.c in space: lex.advance(T)
if lex.c in lineEnd + {'#'}:
lex.cur = ltEmptyLine
lex.nextState = expectLineEnd[T]
return true
lex.nextState = insideLine[T]
result = false
proc possibleIndicatorChar[T](lex: YamlLexer, indicator: LexerToken, proc possibleIndicatorChar[T](lex: YamlLexer, indicator: LexerToken,
jsonContext: bool = false): bool = jsonContext: bool = false): bool =
startToken[T](lex) startToken[T](lex)
if not(jsonContext) and lex.nextIsPlainSafe(T, false): if not(jsonContext) and lex.nextIsPlainSafe(T, false):
lex.consumeNewlines()
lex.nextState = plainScalarPart[T] lex.nextState = plainScalarPart[T]
result = false result = false
else: else:
@ -640,6 +669,7 @@ proc insideLine[T](lex: YamlLexer): bool =
result = true result = true
of '>', '|': of '>', '|':
startToken[T](lex) startToken[T](lex)
lex.consumeNewlines()
if lex.inFlow: lex.nextState = plainScalarPart[T] if lex.inFlow: lex.nextState = plainScalarPart[T]
else: lex.nextState = blockScalarHeader[T] else: lex.nextState = blockScalarHeader[T]
result = false result = false
@ -662,6 +692,7 @@ proc insideLine[T](lex: YamlLexer): bool =
"Reserved characters cannot start a plain scalar") "Reserved characters cannot start a plain scalar")
else: else:
startToken[T](lex) startToken[T](lex)
lex.consumeNewlines()
lex.nextState = plainScalarPart[T] lex.nextState = plainScalarPart[T]
result = false result = false
@ -945,6 +976,7 @@ proc init*[T](lex: YamlLexer) =
lex.inlineState = insideLine[T] lex.inlineState = insideLine[T]
lex.insideLineImpl = insideLine[T] lex.insideLineImpl = insideLine[T]
lex.insideDocImpl = insideDoc[T] lex.insideDocImpl = insideDoc[T]
lex.insideFlowImpl = insideFlow[T]
lex.tokenLineGetter = tokenLine[T] lex.tokenLineGetter = tokenLine[T]
lex.searchColonImpl = searchColon[T] lex.searchColonImpl = searchColon[T]
@ -955,7 +987,7 @@ proc newYamlLexer*(source: Stream): YamlLexer =
dealloc(x.source) dealloc(x.source)
) )
result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "", result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "",
c: blSource[].buf[blSource[].bufpos]) c: blSource[].buf[blSource[].bufpos], newlines: 0)
init[BaseLexer](result) init[BaseLexer](result)
proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer = proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
@ -966,7 +998,7 @@ proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
dealloc(x.source) dealloc(x.source)
) )
result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false, result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false,
c: sSource.src[startAt]) c: sSource.src[startAt], newlines: 0)
init[StringSource](result) init[StringSource](result)
proc next*(lex: YamlLexer) = proc next*(lex: YamlLexer) =
@ -978,10 +1010,10 @@ proc setFlow*(lex: YamlLexer, value: bool) =
# necessary. actually, the lexer will behave wrongly if we do that, because # necessary. actually, the lexer will behave wrongly if we do that, because
# adjacent values need to check if the preceding token was a JSON value, and # adjacent values need to check if the preceding token was a JSON value, and
# if indentation tokens are generated, that information is not available. # if indentation tokens are generated, that information is not available.
# therefore, we do not use insideDoc in flow mode. another reason is that this # therefore, we use insideFlow instead of insideDoc in flow mode. another
# would erratically check for document markers (---, ...) which are simply # reason is that this would erratically check for document markers (---, ...)
# scalars in flow mode. # which are simply scalars in flow mode.
if value: lex.lineStartState = lex.insideLineImpl if value: lex.lineStartState = lex.insideFlowImpl
else: lex.lineStartState = lex.insideDocImpl else: lex.lineStartState = lex.insideDocImpl
proc endBlockScalar*(lex: YamlLexer) = proc endBlockScalar*(lex: YamlLexer) =

View File

@ -227,6 +227,23 @@ suite "Lexer":
oo(), an("b"), sp("b"), mv(), al("a"), c(), al("b"), mv(), sp("c"), oo(), an("b"), sp("b"), mv(), al("a"), c(), al("b"), mv(), sp("c"),
oc(), se()) oc(), se())
test "Empty lines":
assertEquals("""block: foo
bar
baz
flow: {
foo
bar: baz
mi
}""", i(0), sp("block"), mv(), sp("foo"), el(), i(2), sp("bar"), el(), i(4),
sp("baz"), i(0), sp("flow"), mv(), oo(), sp("foo"), el(), sp("bar"), mv(),
sp("baz"), el(), el(), sp("mi"), oc(), se())
suite "Lookahead": suite "Lookahead":
test "Simple Scalar": test "Simple Scalar":
assertLookahead("abcde", false) assertLookahead("abcde", false)