mirror of https://github.com/status-im/NimYAML.git
Enhanced empty line processing
This commit is contained in:
parent
b9286fa550
commit
a1f900ae44
|
@ -34,12 +34,15 @@ type
|
||||||
# ltTagHandle
|
# ltTagHandle
|
||||||
shorthandEnd*: int
|
shorthandEnd*: int
|
||||||
|
|
||||||
|
# may be modified from outside; will be consumed at plain scalar starts
|
||||||
|
newlines*: int
|
||||||
|
|
||||||
# internals
|
# internals
|
||||||
source: pointer
|
source: pointer
|
||||||
inFlow: bool
|
inFlow: bool
|
||||||
literalEndIndent: int
|
literalEndIndent: int
|
||||||
nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl:
|
nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl,
|
||||||
LexerState
|
insideFlowImpl: LexerState
|
||||||
blockScalarIndent: int
|
blockScalarIndent: int
|
||||||
c: char
|
c: char
|
||||||
tokenLineGetter: proc(lex: YamlLexer): string
|
tokenLineGetter: proc(lex: YamlLexer): string
|
||||||
|
@ -221,6 +224,7 @@ proc possibleDirectivesEnd[T](lex: YamlLexer): bool
|
||||||
proc possibleDocumentEnd[T](lex: YamlLexer): bool
|
proc possibleDocumentEnd[T](lex: YamlLexer): bool
|
||||||
proc afterSeqInd[T](lex: YamlLexer): bool
|
proc afterSeqInd[T](lex: YamlLexer): bool
|
||||||
proc insideDoc[T](lex: YamlLexer): bool {.locks:0.}
|
proc insideDoc[T](lex: YamlLexer): bool {.locks:0.}
|
||||||
|
proc insideFlow[T](lex: YamlLexer): bool
|
||||||
proc insideLine[T](lex: YamlLexer): bool
|
proc insideLine[T](lex: YamlLexer): bool
|
||||||
proc plainScalarPart[T](lex: YamlLexer): bool
|
proc plainScalarPart[T](lex: YamlLexer): bool
|
||||||
proc blockScalarHeader[T](lex: YamlLexer): bool
|
proc blockScalarHeader[T](lex: YamlLexer): bool
|
||||||
|
@ -252,6 +256,13 @@ proc directiveName[T](lex: YamlLexer) =
|
||||||
lex.buf.add(lex.c)
|
lex.buf.add(lex.c)
|
||||||
lex.advance(T)
|
lex.advance(T)
|
||||||
|
|
||||||
|
proc consumeNewlines(lex: YamlLexer) =
|
||||||
|
case lex.newlines
|
||||||
|
of 0: return
|
||||||
|
of 1: lex.buf.add(' ')
|
||||||
|
else: lex.buf.add(repeat('\l', lex.newlines - 1))
|
||||||
|
lex.newlines = 0
|
||||||
|
|
||||||
proc yamlVersion[T](lex: YamlLexer): bool =
|
proc yamlVersion[T](lex: YamlLexer): bool =
|
||||||
debug("lex: yamlVersion")
|
debug("lex: yamlVersion")
|
||||||
while lex.c in space: lex.advance(T)
|
while lex.c in space: lex.advance(T)
|
||||||
|
@ -372,13 +383,16 @@ proc possibleDirectivesEnd[T](lex: YamlLexer): bool =
|
||||||
lex.cur = ltDirectivesEnd
|
lex.cur = ltDirectivesEnd
|
||||||
lex.nextState = insideLine[T]
|
lex.nextState = insideLine[T]
|
||||||
return true
|
return true
|
||||||
|
lex.consumeNewlines()
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
|
else: lex.consumeNewlines()
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
elif lex.c in spaceOrLineEnd:
|
elif lex.c in spaceOrLineEnd:
|
||||||
lex.indentation = 0
|
lex.indentation = 0
|
||||||
lex.cur = ltIndentation
|
lex.cur = ltIndentation
|
||||||
lex.nextState = afterSeqInd[T]
|
lex.nextState = afterSeqInd[T]
|
||||||
return true
|
return true
|
||||||
|
else: lex.consumeNewlines()
|
||||||
lex.buf.add('-')
|
lex.buf.add('-')
|
||||||
lex.nextState = plainScalarPart[T]
|
lex.nextState = plainScalarPart[T]
|
||||||
result = false
|
result = false
|
||||||
|
@ -401,8 +415,11 @@ proc possibleDocumentEnd[T](lex: YamlLexer): bool =
|
||||||
lex.nextState = expectLineEnd[T]
|
lex.nextState = expectLineEnd[T]
|
||||||
lex.lineStartState = outsideDoc[T]
|
lex.lineStartState = outsideDoc[T]
|
||||||
return true
|
return true
|
||||||
|
lex.consumeNewlines()
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
|
else: lex.consumeNewlines()
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
|
else: lex.consumeNewlines()
|
||||||
lex.buf.add('.')
|
lex.buf.add('.')
|
||||||
lex.nextState = plainScalarPart[T]
|
lex.nextState = plainScalarPart[T]
|
||||||
result = false
|
result = false
|
||||||
|
@ -472,10 +489,22 @@ proc insideDoc[T](lex: YamlLexer): bool =
|
||||||
lex.cur = ltIndentation
|
lex.cur = ltIndentation
|
||||||
result = true
|
result = true
|
||||||
|
|
||||||
|
proc insideFlow[T](lex: YamlLexer): bool =
|
||||||
|
debug("lex: insideFlow")
|
||||||
|
startToken[T](lex)
|
||||||
|
while lex.c in space: lex.advance(T)
|
||||||
|
if lex.c in lineEnd + {'#'}:
|
||||||
|
lex.cur = ltEmptyLine
|
||||||
|
lex.nextState = expectLineEnd[T]
|
||||||
|
return true
|
||||||
|
lex.nextState = insideLine[T]
|
||||||
|
result = false
|
||||||
|
|
||||||
proc possibleIndicatorChar[T](lex: YamlLexer, indicator: LexerToken,
|
proc possibleIndicatorChar[T](lex: YamlLexer, indicator: LexerToken,
|
||||||
jsonContext: bool = false): bool =
|
jsonContext: bool = false): bool =
|
||||||
startToken[T](lex)
|
startToken[T](lex)
|
||||||
if not(jsonContext) and lex.nextIsPlainSafe(T, false):
|
if not(jsonContext) and lex.nextIsPlainSafe(T, false):
|
||||||
|
lex.consumeNewlines()
|
||||||
lex.nextState = plainScalarPart[T]
|
lex.nextState = plainScalarPart[T]
|
||||||
result = false
|
result = false
|
||||||
else:
|
else:
|
||||||
|
@ -640,6 +669,7 @@ proc insideLine[T](lex: YamlLexer): bool =
|
||||||
result = true
|
result = true
|
||||||
of '>', '|':
|
of '>', '|':
|
||||||
startToken[T](lex)
|
startToken[T](lex)
|
||||||
|
lex.consumeNewlines()
|
||||||
if lex.inFlow: lex.nextState = plainScalarPart[T]
|
if lex.inFlow: lex.nextState = plainScalarPart[T]
|
||||||
else: lex.nextState = blockScalarHeader[T]
|
else: lex.nextState = blockScalarHeader[T]
|
||||||
result = false
|
result = false
|
||||||
|
@ -662,6 +692,7 @@ proc insideLine[T](lex: YamlLexer): bool =
|
||||||
"Reserved characters cannot start a plain scalar")
|
"Reserved characters cannot start a plain scalar")
|
||||||
else:
|
else:
|
||||||
startToken[T](lex)
|
startToken[T](lex)
|
||||||
|
lex.consumeNewlines()
|
||||||
lex.nextState = plainScalarPart[T]
|
lex.nextState = plainScalarPart[T]
|
||||||
result = false
|
result = false
|
||||||
|
|
||||||
|
@ -945,6 +976,7 @@ proc init*[T](lex: YamlLexer) =
|
||||||
lex.inlineState = insideLine[T]
|
lex.inlineState = insideLine[T]
|
||||||
lex.insideLineImpl = insideLine[T]
|
lex.insideLineImpl = insideLine[T]
|
||||||
lex.insideDocImpl = insideDoc[T]
|
lex.insideDocImpl = insideDoc[T]
|
||||||
|
lex.insideFlowImpl = insideFlow[T]
|
||||||
lex.tokenLineGetter = tokenLine[T]
|
lex.tokenLineGetter = tokenLine[T]
|
||||||
lex.searchColonImpl = searchColon[T]
|
lex.searchColonImpl = searchColon[T]
|
||||||
|
|
||||||
|
@ -955,7 +987,7 @@ proc newYamlLexer*(source: Stream): YamlLexer =
|
||||||
dealloc(x.source)
|
dealloc(x.source)
|
||||||
)
|
)
|
||||||
result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "",
|
result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "",
|
||||||
c: blSource[].buf[blSource[].bufpos])
|
c: blSource[].buf[blSource[].bufpos], newlines: 0)
|
||||||
init[BaseLexer](result)
|
init[BaseLexer](result)
|
||||||
|
|
||||||
proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
|
proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
|
||||||
|
@ -966,7 +998,7 @@ proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
|
||||||
dealloc(x.source)
|
dealloc(x.source)
|
||||||
)
|
)
|
||||||
result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false,
|
result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false,
|
||||||
c: sSource.src[startAt])
|
c: sSource.src[startAt], newlines: 0)
|
||||||
init[StringSource](result)
|
init[StringSource](result)
|
||||||
|
|
||||||
proc next*(lex: YamlLexer) =
|
proc next*(lex: YamlLexer) =
|
||||||
|
@ -978,10 +1010,10 @@ proc setFlow*(lex: YamlLexer, value: bool) =
|
||||||
# necessary. actually, the lexer will behave wrongly if we do that, because
|
# necessary. actually, the lexer will behave wrongly if we do that, because
|
||||||
# adjacent values need to check if the preceding token was a JSON value, and
|
# adjacent values need to check if the preceding token was a JSON value, and
|
||||||
# if indentation tokens are generated, that information is not available.
|
# if indentation tokens are generated, that information is not available.
|
||||||
# therefore, we do not use insideDoc in flow mode. another reason is that this
|
# therefore, we use insideFlow instead of insideDoc in flow mode. another
|
||||||
# would erratically check for document markers (---, ...) which are simply
|
# reason is that this would erratically check for document markers (---, ...)
|
||||||
# scalars in flow mode.
|
# which are simply scalars in flow mode.
|
||||||
if value: lex.lineStartState = lex.insideLineImpl
|
if value: lex.lineStartState = lex.insideFlowImpl
|
||||||
else: lex.lineStartState = lex.insideDocImpl
|
else: lex.lineStartState = lex.insideDocImpl
|
||||||
|
|
||||||
proc endBlockScalar*(lex: YamlLexer) =
|
proc endBlockScalar*(lex: YamlLexer) =
|
||||||
|
|
|
@ -227,6 +227,23 @@ suite "Lexer":
|
||||||
oo(), an("b"), sp("b"), mv(), al("a"), c(), al("b"), mv(), sp("c"),
|
oo(), an("b"), sp("b"), mv(), al("a"), c(), al("b"), mv(), sp("c"),
|
||||||
oc(), se())
|
oc(), se())
|
||||||
|
|
||||||
|
test "Empty lines":
|
||||||
|
assertEquals("""block: foo
|
||||||
|
|
||||||
|
bar
|
||||||
|
|
||||||
|
baz
|
||||||
|
flow: {
|
||||||
|
foo
|
||||||
|
|
||||||
|
bar: baz
|
||||||
|
|
||||||
|
|
||||||
|
mi
|
||||||
|
}""", i(0), sp("block"), mv(), sp("foo"), el(), i(2), sp("bar"), el(), i(4),
|
||||||
|
sp("baz"), i(0), sp("flow"), mv(), oo(), sp("foo"), el(), sp("bar"), mv(),
|
||||||
|
sp("baz"), el(), el(), sp("mi"), oc(), se())
|
||||||
|
|
||||||
suite "Lookahead":
|
suite "Lookahead":
|
||||||
test "Simple Scalar":
|
test "Simple Scalar":
|
||||||
assertLookahead("abcde", false)
|
assertLookahead("abcde", false)
|
||||||
|
|
Loading…
Reference in New Issue