Fixed block scalars in yaml test suite

This commit is contained in:
Felix Krause 2016-09-13 20:19:02 +02:00
parent 451ffb8e95
commit 2a6a103b36
2 changed files with 128 additions and 52 deletions

View File

@ -28,9 +28,6 @@ type
buf*: string not nil buf*: string not nil
# ltIndentation # ltIndentation
indentation*: int indentation*: int
# ltBlockScalarHeader
moreIndented*, folded*: bool
chomp*: ChompType
# ltTagHandle # ltTagHandle
shorthandEnd*: int shorthandEnd*: int
@ -44,6 +41,8 @@ type
nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl, nextState, lineStartState, inlineState, insideLineImpl, insideDocImpl,
insideFlowImpl, outsideDocImpl: LexerState insideFlowImpl, outsideDocImpl: LexerState
blockScalarIndent: int blockScalarIndent: int
folded: bool
chomp: ChompType
c: char c: char
tokenLineGetter: proc(lex: YamlLexer, marker: bool): string {.raises: [].} tokenLineGetter: proc(lex: YamlLexer, marker: bool): string {.raises: [].}
searchColonImpl: proc(lex: YamlLexer): bool searchColonImpl: proc(lex: YamlLexer): bool
@ -242,6 +241,9 @@ proc insideLine[T](lex: YamlLexer): bool
proc plainScalarPart[T](lex: YamlLexer): bool proc plainScalarPart[T](lex: YamlLexer): bool
proc blockScalarHeader[T](lex: YamlLexer): bool proc blockScalarHeader[T](lex: YamlLexer): bool
proc blockScalar[T](lex: YamlLexer): bool proc blockScalar[T](lex: YamlLexer): bool
proc indentationAfterBlockScalar[T](lex: YamlLexer): bool
proc dirEndAfterBlockScalar[T](lex: YamlLexer): bool
proc docEndAfterBlockScalar[T](lex: YamlLexer): bool
proc tagHandle[T](lex: YamlLexer): bool proc tagHandle[T](lex: YamlLexer): bool
proc anchor[T](lex: YamlLexer): bool proc anchor[T](lex: YamlLexer): bool
proc alias[T](lex: YamlLexer): bool proc alias[T](lex: YamlLexer): bool
@ -270,7 +272,7 @@ proc directiveName[T](lex: YamlLexer) =
lex.buf.add(lex.c) lex.buf.add(lex.c)
lex.advance(T) lex.advance(T)
proc consumeNewlines(lex: YamlLexer) {.raises: [].} = proc consumeNewlines(lex: YamlLexer) {.inline, raises: [].} =
case lex.newlines case lex.newlines
of 0: return of 0: return
of 1: lex.buf.add(' ') of 1: lex.buf.add(' ')
@ -397,7 +399,7 @@ proc possibleDirectivesEnd[T](lex: YamlLexer): bool =
if lex.c in spaceOrLineEnd: if lex.c in spaceOrLineEnd:
lex.cur = ltDirectivesEnd lex.cur = ltDirectivesEnd
while lex.c in space: lex.advance(T) while lex.c in space: lex.advance(T)
lex.nextState = insideLine[T] lex.nextState = lex.insideLineImpl
return true return true
lex.consumeNewlines() lex.consumeNewlines()
lex.buf.add('-') lex.buf.add('-')
@ -419,7 +421,7 @@ proc afterSeqInd[T](lex: YamlLexer): bool =
if lex.c notin lineEnd: if lex.c notin lineEnd:
lex.advance(T) lex.advance(T)
while lex.c in space: lex.advance(T) while lex.c in space: lex.advance(T)
lex.nextState = insideLine[T] lex.nextState = lex.insideLineImpl
proc possibleDocumentEnd[T](lex: YamlLexer): bool = proc possibleDocumentEnd[T](lex: YamlLexer): bool =
debug("lex: possibleDocumentEnd") debug("lex: possibleDocumentEnd")
@ -502,8 +504,6 @@ proc insideDoc[T](lex: YamlLexer): bool =
lex.cur = ltEmptyLine lex.cur = ltEmptyLine
lex.nextState = expectLineEnd[T] lex.nextState = expectLineEnd[T]
return true return true
of '\t':
raise generateError[T](lex, "'\\t' cannot start any token")
else: else:
lex.nextState = lex.inlineState lex.nextState = lex.inlineState
else: lex.nextState = lex.inlineState else: lex.nextState = lex.inlineState
@ -771,6 +771,7 @@ proc blockScalarHeader[T](lex: YamlLexer): bool =
lex.chomp = ctClip lex.chomp = ctClip
lex.blockScalarIndent = UnknownIndentation lex.blockScalarIndent = UnknownIndentation
lex.folded = lex.c == '>' lex.folded = lex.c == '>'
startToken[T](lex)
while true: while true:
lex.advance(T) lex.advance(T)
case lex.c case lex.c
@ -792,32 +793,122 @@ proc blockScalarHeader[T](lex: YamlLexer): bool =
"Illegal character in block scalar header: '" & escape("" & lex.c) & "Illegal character in block scalar header: '" & escape("" & lex.c) &
'\'') '\'')
lex.nextState = expectLineEnd[T] lex.nextState = expectLineEnd[T]
lex.inlineState = blockScalar[T] lex.lineStartState = blockScalar[T]
lex.cur = ltBlockScalarHeader result = false
result = true
proc blockScalar[T](lex: YamlLexer): bool = proc blockScalar[T](lex: YamlLexer): bool =
debug("lex: blockScalarLine") debug("lex: blockScalar")
startToken[T](lex) block outer:
result = false if lex.blockScalarIndent == UnknownIndentation:
if lex.blockScalarIndent == UnknownIndentation: while true:
lex.blockScalarIndent = lex.indentation lex.blockScalarIndent = 0
elif lex.c == '#': while lex.c == ' ':
lex.nextState = expectLineEnd[T] lex.blockScalarIndent.inc()
return false lex.advance(T)
elif lex.indentation < lex.blockScalarIndent: case lex.c
raise generateError[T](lex, "Too little indentation in block scalar") of '\l': lex.lexLF(T)
elif lex.indentation > lex.blockScalarIndent or lex.c == '\t': of '\c': lex.lexCR(T)
lex.moreIndented = true of EndOfFile:
lex.buf.addMultiple(' ', lex.indentation - lex.blockScalarIndent) lex.nextState = streamEnd
lex.curStartPos.column -= lex.indentation - lex.blockScalarIndent break outer
else: lex.moreIndented = false else:
while lex.c notin lineEnd: if lex.blockScalarIndent <= lex.indentation:
lex.buf.add(lex.c) lex.indentation = lex.blockScalarIndent
lex.advance(T) lex.nextState = indentationAfterBlockScalar[T]
lex.cur = ltScalarPart break outer
lex.indentation = lex.blockScalarIndent
break
var recentWasMoreIndented = false
while true:
block lineStart:
case lex.c
of '-':
if lex.indentation < lex.blockScalarIndent:
lex.nextState = indentationAfterBlockScalar[T]
break outer
discard possibleDirectivesEnd[T](lex)
case lex.cur
of ltDirectivesEnd:
lex.nextState = dirEndAfterBlockScalar[T]
break outer
of ltIndentation:
if lex.nextState == afterSeqInd[T]:
lex.consumeNewlines()
lex.buf.add("- ")
else: discard
of '.':
if lex.indentation < lex.blockScalarIndent:
lex.nextState = indentationAfterBlockScalar[T]
break outer
if possibleDocumentEnd[T](lex):
lex.nextState = docEndAfterBlockScalar[T]
discard
of spaceOrLineEnd:
while lex.c == ' ' and lex.indentation < lex.blockScalarIndent:
lex.indentation.inc()
lex.advance(T)
case lex.c
of '\l':
lex.newlines.inc()
lex.lexLF(T)
lex.indentation = 0
continue
of '\c':
lex.newlines.inc()
lex.lexCR(T)
lex.indentation = 0
continue
of EndOfFile:
lex.nextState = streamEnd
break outer
of ' ':
recentWasMoreIndented = true
lex.buf.add(repeat('\l', lex.newlines))
lex.newlines = 0
break lineStart
else: discard
else: discard
if lex.indentation < lex.blockScalarIndent:
lex.nextState = indentationAfterBlockScalar[T]
break outer
if lex.folded and not recentWasMoreIndented: lex.consumeNewlines()
else:
recentWasMoreIndented = false
lex.buf.add(repeat('\l', lex.newlines))
lex.newlines = 0
while lex.c notin lineEnd:
lex.buf.add(lex.c)
lex.advance(T)
case lex.chomp
of ctStrip: discard
of ctClip: lex.buf.add('\l')
of ctKeep: lex.buf.add(repeat('\l', lex.newlines))
lex.newlines = 0
lex.lineStartState = insideDoc[T]
lex.cur = ltQuotedScalar
result = true result = true
proc indentationAfterBlockScalar[T](lex: YamlLexer): bool =
if lex.c == '#':
lex.nextState = expectLineEnd[T]
result = false
else:
lex.cur = ltIndentation
result = true
lex.nextState = lex.lineStartState
proc dirEndAfterBlockScalar[T](lex: YamlLexer): bool =
lex.cur = ltDirectivesEnd
while lex.c in space: lex.advance(T)
lex.nextState = lex.insideLineImpl
proc docEndAfterBlockScalar[T](lex: YamlLexer): bool =
lex.cur = ltDocumentEnd
lex.nextState = expectLineEnd[T] lex.nextState = expectLineEnd[T]
lex.lineStartState = lex.outsideDocImpl
proc byteSequence[T](lex: YamlLexer) = proc byteSequence[T](lex: YamlLexer) =
debug("lex: byteSequence") debug("lex: byteSequence")
@ -1009,7 +1100,7 @@ proc newYamlLexer*(source: Stream): YamlLexer =
dealloc(x.source) dealloc(x.source)
) )
result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "", result[] = YamlLexerObj(source: blSource, inFlow: false, buf: "",
c: blSource[].buf[blSource[].bufpos], newlines: 0) c: blSource[].buf[blSource[].bufpos], newlines: 0, folded: true)
init[BaseLexer](result) init[BaseLexer](result)
proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer = proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
@ -1020,7 +1111,7 @@ proc newYamlLexer*(source: string, startAt: int = 0): YamlLexer =
dealloc(x.source) dealloc(x.source)
) )
result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false, result[] = YamlLexerObj(buf: "", source: sSource, inFlow: false,
c: sSource.src[startAt], newlines: 0) c: sSource.src[startAt], newlines: 0, folded: true)
init[StringSource](result) init[StringSource](result)
proc next*(lex: YamlLexer) = proc next*(lex: YamlLexer) =
@ -1041,6 +1132,7 @@ proc setFlow*(lex: YamlLexer, value: bool) =
proc endBlockScalar*(lex: YamlLexer) = proc endBlockScalar*(lex: YamlLexer) =
lex.inlineState = lex.insideLineImpl lex.inlineState = lex.insideLineImpl
lex.nextState = lex.insideLineImpl lex.nextState = lex.insideLineImpl
lex.folded = true
proc getTokenLine*(lex: YamlLexer, marker: bool = true): string = proc getTokenLine*(lex: YamlLexer, marker: bool = true): string =
result = lex.tokenLineGetter(lex, marker) result = lex.tokenLineGetter(lex, marker)

View File

@ -262,21 +262,6 @@ proc handleFlowPlainScalar(c: ParserContext) =
c.advance() c.advance()
c.lex.newlines = 0 c.lex.newlines = 0
proc handleBlockScalar(c: ParserContext) =
while true:
c.advance()
case c.lex.cur
of ltScalarPart: discard
of ltEmptyLine: c.lex.newlines.inc()
of ltIndentation:
if c.lex.indentation <= c.ancestry[^1].indentation:
# TODO: handle clipping
break
of ltStreamEnd, ltDirectivesEnd, ltDocumentEnd:
# TODO: handle clipping
break
else: internalError("Unexpected token: " & $c.lex.cur)
# --- macros for defining parser states --- # --- macros for defining parser states ---
template capitalize(s: string): string = template capitalize(s: string): string =
@ -467,7 +452,9 @@ parserState initial:
c.callCallback("Unknown directive: " & c.lex.buf) c.callCallback("Unknown directive: " & c.lex.buf)
c.lex.buf.setLen(0) c.lex.buf.setLen(0)
c.advance() c.advance()
assert c.lex.cur == ltUnknownDirectiveParams if c.lex.cur == ltUnknownDirectiveParams:
c.lex.buf.setLen(0)
c.advance()
of ltIndentation: of ltIndentation:
e = startDocEvent() e = startDocEvent()
result = true result = true
@ -499,6 +486,7 @@ parserState blockObjectStart:
of ltEmptyLine: c.advance() of ltEmptyLine: c.advance()
of ltIndentation: of ltIndentation:
c.advance() c.advance()
c.level.indentation = UnknownIndentation
state = blockLineStart state = blockLineStart
of ltDirectivesEnd: of ltDirectivesEnd:
c.closeEverything() c.closeEverything()
@ -514,10 +502,6 @@ parserState blockObjectStart:
result = c.handleBlockItemStart(e) result = c.handleBlockItemStart(e)
c.advance() c.advance()
state = scalarEnd state = scalarEnd
of ltBlockScalarHeader:
result = c.handleBlockItemStart(e)
c.handleBlockScalar()
state = scalarEnd
of ltScalarPart: of ltScalarPart:
result = c.handleBlockItemStart(e) result = c.handleBlockItemStart(e)
while true: while true: