Restructured source code

This commit is contained in:
Felix Krause 2015-12-23 12:35:07 +01:00
parent 5c0e9f570b
commit ffcbbf89e6
5 changed files with 487 additions and 469 deletions

View File

@ -1,7 +1,7 @@
import lexbase, unicode, streams
# file must be included from yaml.nim and cannot compile on its own
type
Encoding* = enum
Encoding = enum
Unsupported, ## Unsupported encoding
UTF8, ## UTF-8
UTF16LE, ## UTF-16 Little Endian
@ -9,31 +9,30 @@ type
UTF32LE, ## UTF-32 Little Endian
UTF32BE ## UTF-32 Big Endian
YamlLexerToken* = enum
YamlLexerToken = enum
# separating tokens
yamlDirectivesEnd, yamlDocumentEnd, yamlStreamEnd,
tDirectivesEnd, tDocumentEnd, tStreamEnd,
# tokens only in directives
yamlTagDirective, yamlYamlDirective, yamlUnknownDirective,
yamlVersionPart, yamlTagURI,
yamlUnknownDirectiveParam,
tTagDirective, tYamlDirective, tUnknownDirective,
tVersionPart, tTagURI,
tUnknownDirectiveParam,
# tokens in directives and content
yamlTagHandle, yamlComment,
tTagHandle, tComment,
# from here on tokens only in content
yamlLineStart,
tLineStart,
# control characters
yamlColon, yamlDash, yamlQuestionmark, yamlComma, yamlOpeningBrace,
yamlOpeningBracket, yamlClosingBrace, yamlClosingBracket, yamlPipe,
yamlGreater,
tColon, tDash, tQuestionmark, tComma, tOpeningBrace,
tOpeningBracket, tClosingBrace, tClosingBracket, tPipe, tGreater,
# block scalar header
yamlBlockIndentationIndicator, yamlPlus,
tBlockIndentationIndicator, tPlus,
# scalar content
yamlScalar, yamlScalarPart,
tScalar, tScalarPart,
# tags
yamlVerbatimTag, yamlTagSuffix,
tVerbatimTag, tTagSuffix,
# anchoring
yamlAnchor, yamlAlias,
tAnchor, tAlias,
# error reporting
yamlError
tError
YamlLexerState = enum
# initial states (not started reading any token)
@ -58,22 +57,19 @@ type
# anchoring
ylAnchor, ylAlias
YamlLexerTypeHintState = enum
YamlTypeHintState = enum
ythInitial, ythN, ythNU, ythNUL, ythNULL, ythF, ythFA, ythFAL, ythFALS,
ythFALSE, ythT, ythTR, ythTRU, ythTRUE, ythMinus, yth0, ythInt,
ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent, ythNone
YamlLexerTypeHint* = enum
yTypeInteger, yTypeFloat, yTypeBoolean, yTypeNull, yTypeString
YamlLexer* = object of BaseLexer
YamlLexer = object of BaseLexer
indentations: seq[int]
encoding: Encoding
charlen: int
charoffset: int
content*: string # my.content of the last returned token.
line*, column*: int
typeHint*: YamlLexerTypeHint
typeHint*: YamlTypeHint
const
UTF8NextLine = toUTF8(Rune(0x85))
@ -140,7 +136,7 @@ proc detect_encoding(my: var YamlLexer) =
of UTF16BE: 1
of UTF32BE: 3
proc open*(my: var YamlLexer, input: Stream) =
proc open(my: var YamlLexer, input: Stream) =
lexbase.open(my, input)
my.indentations = newSeq[int]()
my.detect_encoding()
@ -150,8 +146,8 @@ proc open*(my: var YamlLexer, input: Stream) =
template yieldToken(kind: YamlLexerToken) {.dirty.} =
when defined(yamlDebug):
if kind == yamlScalar:
echo "Lexer token: yamlScalar(\"", my.content, "\")"
if kind == tScalar:
echo "Lexer token: tScalar(\"", my.content, "\")"
else:
echo "Lexer token: ", kind
yield kind
@ -171,16 +167,16 @@ template yieldScalarPart() {.dirty.} =
my.typeHint = yTypeString
when defined(yamlDebug):
echo "Lexer token: yamlScalarPart(\"", my.content, "\".", my.typeHint,
echo "Lexer token: tScalarPart(\"", my.content, "\".", my.typeHint,
")"
yield yamlScalarPart
yield tScalarPart
my.content = ""
template yieldError(message: string) {.dirty.} =
template yieldLexerError(message: string) {.dirty.} =
when defined(yamlDebug):
echo "Lexer error: " & message
my.content = message
yield yamlError
yield tError
my.content = ""
template handleCR() {.dirty.} =
@ -330,7 +326,7 @@ template advanceTypeHint(ch: char) {.dirty.} =
state = ylPlainScalarNone
iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
iterator tokens(my: var YamlLexer): YamlLexerToken {.closure.} =
var
# the following three values are used for parsing escaped unicode chars
@ -393,7 +389,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
state = ylDashes
continue
of '.':
yieldToken(yamlLineStart)
yieldToken(tLineStart)
my.column = 0
state = ylDots
continue
@ -407,17 +403,17 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
yieldToken(yamlDirectivesEnd)
yieldToken(tDirectivesEnd)
state = ylInitialInLine
of 1:
my.content = ""
yieldToken(yamlLineStart)
yieldToken(tLineStart)
lastSpecialChar = '-'
state = ylInitialInLine
else:
let tmp = my.content
my.content = ""
yieldToken(yamlLineStart)
yieldToken(tLineStart)
my.content = tmp
my.column = curPos
state = ylPlainScalarNone
@ -426,7 +422,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
else:
let tmp = my.content
my.content = ""
yieldToken(yamlLineStart)
yieldToken(tLineStart)
my.content = tmp
if my.content.len == 1:
typeHintState = ythMinus
@ -442,7 +438,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ' ', '\t', '\r', '\x0A', EndOfFile:
case my.content.len
of 3:
yieldToken(yamlDocumentEnd)
yieldToken(tDocumentEnd)
state = ylDirectiveLineEnd
else:
state = ylPlainScalarNone
@ -463,14 +459,14 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
state = ylInitial
continue
of EndOfFile:
yieldToken(yamlStreamEnd)
yieldToken(tStreamEnd)
break
of ' ', '\t':
discard
of '#':
state = ylDirectiveComment
else:
yieldError("Unexpected content at end of directive: " & c)
yieldLexerError("Unexpected content at end of directive: " & c)
of ylLineEnd:
case c
of '\r':
@ -478,10 +474,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of '\x0A':
handleLF()
of EndOfFile:
yieldToken(yamlStreamEnd)
yieldToken(tStreamEnd)
break
else:
yieldError("Internal error: Unexpected char at line end: " & c)
yieldLexerError("Internal error: Unexpected char at line end: " & c)
state = ylInitialContent
continue
of ylSingleQuotedScalar:
@ -492,12 +488,12 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content.add(c)
lastSpecialChar = '\0'
of EndOfFile, '\r', '\x0A':
yieldToken(yamlScalar)
yieldToken(tScalar)
lastSpecialChar = '\0'
state = ylLineEnd
continue
else:
yieldToken(yamlScalar)
yieldToken(tScalar)
lastSpecialChar = '\0'
state = ylSpaceAfterQuotedScalar
continue
@ -506,19 +502,19 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of '\'':
lastSpecialChar = c
of EndOfFile:
yieldError("Unterminated single quoted string")
yieldToken(yamlStreamEnd)
yieldLexerError("Unterminated single quoted string")
yieldToken(tStreamEnd)
break
else:
my.content.add(c)
of ylDoublyQuotedScalar:
case c
of '"':
yieldToken(yamlScalar)
yieldToken(tScalar)
state = ylSpaceAfterQuotedScalar
of EndOfFile:
yieldError("Unterminated doubly quoted string")
yieldToken(yamlStreamEnd)
yieldLexerError("Unterminated doubly quoted string")
yieldToken(tStreamEnd)
break
of '\\':
state = ylEscape
@ -536,7 +532,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
expectedEscapeLength = 0
case c
of EndOfFile:
yieldError("Unterminated doubly quoted string")
yieldLexerError("Unterminated doubly quoted string")
of '0': my.content.add('\0')
of 'a': my.content.add('\x07')
of 'b': my.content.add('\x08')
@ -558,13 +554,13 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of 'u': unicodeChar = cast[Rune](0); expectedEscapeLength = 5
of 'U': unicodeChar = cast[Rune](0); expectedEscapeLength = 9
else:
yieldError("Unsupported escape sequence: \\" & c)
yieldLexerError("Unsupported escape sequence: \\" & c)
if expectedEscapeLength == 0: state = ylDoublyQuotedScalar
else:
let digitPosition = expectedEscapeLength - escapeLength - 1
case c
of EndOFFile:
yieldError("Unterminated escape sequence")
yieldLexerError("Unterminated escape sequence")
state = ylLineEnd
continue
of '0' .. '9':
@ -577,7 +573,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
unicodeChar = unicodechar or
(cast[int](c) - 0x57) shl (digitPosition * 4)
else:
yieldError("unsupported char in unicode escape sequence: " &
yieldLexerError("unsupported char in unicode escape sequence: " &
c)
escapeLength = 0
state = ylDoublyQuotedScalar
@ -593,7 +589,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
trailingSpace.add(c)
of '#':
if trailingSpace.len > 0:
yieldError("Missing space before comment start")
yieldLexerError("Missing space before comment start")
state = ylComment
trailingSpace = ""
else:
@ -689,7 +685,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ':', '#':
lastSpecialChar = c
of '[', ']', '{', '}':
yieldToken(yamlScalar)
yieldToken(tScalar)
trailingSpace = ""
state = ylInitialInLine
continue
@ -710,20 +706,20 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.content = "#"
state = ylComment
of ':':
yieldToken(yamlColon)
yieldToken(tColon)
of '?':
yieldToken(yamlQuestionmark)
yieldToken(tQuestionmark)
of '-':
yieldToken(yamlDash)
yieldToken(tDash)
of ',':
yieldToken(yamlComma)
yieldToken(tComma)
of '!':
my.content = "!"
yieldToken(yamlTagHandle)
yieldToken(tTagHandle)
my.content = ""
yieldToken(yamlTagSuffix)
yieldToken(tTagSuffix)
else:
yieldError("Unexpected special char: \"" &
yieldLexerError("Unexpected special char: \"" &
lastSpecialChar & "\"")
lastSpecialChar = '\0'
elif lastSpecialChar == '!':
@ -750,7 +746,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
continue
of ',':
if flowDepth > 0:
yieldToken(yamlComma)
yieldToken(tComma)
else:
my.content = "" & c
my.column = curPos
@ -759,16 +755,16 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
advanceTypeHint(c)
of '[':
inc(flowDepth)
yieldToken(yamlOpeningBracket)
yieldToken(tOpeningBracket)
of '{':
inc(flowDepth)
yieldToken(yamlOpeningBrace)
yieldToken(tOpeningBrace)
of ']':
yieldToken(yamlClosingBracket)
yieldToken(tClosingBracket)
if flowDepth > 0:
inc(flowDepth, -1)
of '}':
yieldToken(yamlClosingBrace)
yieldToken(tClosingBrace)
if flowDepth > 0:
inc(flowDepth, -1)
of '#':
@ -803,10 +799,10 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
my.column = curPos
lastSpecialChar = c
of '|':
yieldToken(yamlPipe)
yieldToken(tPipe)
state = ylBlockScalarHeader
of '>':
yieldToken(yamlGreater)
yieldToken(tGreater)
state = ylBlockScalarHeader
of '\t':
discard
@ -819,14 +815,14 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylComment, ylDirectiveComment:
case c
of EndOfFile, '\r', '\x0A':
yieldToken(yamlComment)
yieldToken(tComment)
case state
of ylComment:
state = ylLineEnd
of ylDirectiveComment:
state = ylDirectiveLineEnd
else:
yieldError("Should never happen")
yieldLexerError("Should never happen")
continue
else:
my.content.add(c)
@ -834,14 +830,14 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
case c
of EndOfFile, '\r', '\x0A':
lastIndentationLength = my.content.len
yieldToken(yamlLineStart)
yieldToken(tLineStart)
state = ylLineEnd
continue
of ' ':
my.content.add(' ')
else:
lastIndentationLength = my.content.len
yieldToken(yamlLineStart)
yieldToken(tLineStart)
if blockScalarIndentation != -1:
if lastIndentationLength <= blockScalarIndentation:
blockScalarIndentation = -1
@ -854,20 +850,20 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
case c
of '!':
my.content.add(c)
yieldToken(yamlTagHandle)
yieldToken(tTagHandle)
state = ylTagSuffix
of 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-':
my.content.add(c)
of ' ', '\t', EndOfFile, '\r', '\x0A':
var suffix = my.content[1..^1]
my.content = "!"
yieldToken(yamlTagHandle)
yieldToken(tTagHandle)
my.content = suffix
yieldToken(yamlTagSuffix)
yieldToken(tTagSuffix)
state = ylInitialInLine
continue
else:
yieldError("Invalid character in tag handle: " & c)
yieldLexerError("Invalid character in tag handle: " & c)
my.content = ""
state = ylInitialInLine
of ylTagSuffix:
@ -876,11 +872,11 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
of ' ', '\t', EndOfFile, '\r', '\x0A':
yieldToken(yamlTagSuffix)
yieldToken(tTagSuffix)
state = ylInitialInLine
continue
else:
yieldError("Invalid character in tag suffix: " & c)
yieldLexerError("Invalid character in tag suffix: " & c)
state = ylInitialInLine
of ylVerbatimTag:
case c
@ -888,27 +884,27 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
of '>':
yieldToken(yamlVerbatimTag)
yieldToken(tVerbatimTag)
state = ylInitialInLine
of EndOfFile, '\r', '\x0A':
yieldError("Unfinished verbatim tag")
yieldLexerError("Unfinished verbatim tag")
state = ylLineEnd
continue
else:
yieldError("Invalid character in tag URI: " & c)
yieldLexerError("Invalid character in tag URI: " & c)
my.content = ""
state = ylInitialInLine
of ylDirective:
case c
of ' ', '\t', '\r', '\x0A', EndOfFile:
if my.content == "%YAML":
yieldToken(yamlYamlDirective)
yieldToken(tYamlDirective)
state = ylMajorVersion
elif my.content == "%TAG":
yieldToken(yamlTagDirective)
yieldToken(tTagDirective)
state = ylDefineTagHandleInitial
else:
yieldToken(yamlUnknownDirective)
yieldToken(tUnknownDirective)
state = ylInitialUnknown
if c == EndOfFile:
continue
@ -930,7 +926,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylUnknownDirectiveParam:
case c
of '\r', '\x0A', EndOfFile, ' ', '\t':
yieldToken(yamlUnknownDirectiveParam)
yieldToken(tUnknownDirectiveParam)
state = ylInitialUnknown
continue
else:
@ -940,66 +936,66 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of '0' .. '9':
my.content.add(c)
of '.':
yieldToken(yamlVersionPart)
yieldToken(tVersionPart)
state = ylMinorVersion
of EndOfFile, '\r', '\x0A', ' ', '\t':
yieldError("Missing YAML minor version.")
yieldLexerError("Missing YAML minor version.")
state = ylDirectiveLineEnd
continue
else:
yieldError("Invalid character in YAML version: " & c)
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
of ylMinorVersion:
case c
of '0' .. '9':
my.content.add(c)
of EndOfFile, '\r', '\x0A', ' ', '\t':
yieldToken(yamlVersionPart)
yieldToken(tVersionPart)
state = ylDirectiveLineEnd
continue
else:
yieldError("Invalid character in YAML version: " & c)
yieldLexerError("Invalid character in YAML version: " & c)
state = ylInitialUnknown
of ylDefineTagHandleInitial:
case c
of ' ', '\t':
discard
of EndOfFile, '\r', '\x0A':
yieldError("Unfinished %TAG directive")
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
continue
of '!':
my.content.add(c)
state = ylDefineTagHandle
else:
yieldError("Unexpected character in %TAG directive: " & c)
yieldLexerError("Unexpected character in %TAG directive: " & c)
state = ylInitialInLine
of ylDefineTagHandle:
case c
of '!':
my.content.add(c)
yieldToken(yamlTagHandle)
yieldToken(tTagHandle)
state = ylDefineTagURIInitial
of 'a' .. 'z', 'A' .. 'Z', '-':
my.content.add(c)
of EndOfFile, '\r', '\x0A':
yieldError("Unfinished %TAG directive")
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
continue
else:
yieldError("Unexpected char in %TAG directive: " & c)
yieldLexerError("Unexpected char in %TAG directive: " & c)
state = ylInitialInLine
of ylDefineTagURIInitial:
case c
of '\t', ' ':
my.content.add(c)
of '\x0A', '\r', EndOfFile:
yieldError("Unfinished %TAG directive")
yieldLexerError("Unfinished %TAG directive")
state = ylDirectiveLineEnd
continue
else:
if my.content.len == 0:
yieldError("Missing whitespace in %TAG directive")
yieldLexerError("Missing whitespace in %TAG directive")
my.content = ""
state = ylDefineTagURI
continue
@ -1009,28 +1005,28 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
'&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')':
my.content.add(c)
of '\x0A', '\r', EndOfFile, ' ', '\t':
yieldToken(yamlTagURI)
yieldToken(tTagURI)
state = ylDirectiveLineEnd
continue
else:
yieldError("Invalid URI character: " & c)
yieldLexerError("Invalid URI character: " & c)
state = ylInitialInLine
continue
of ylBlockScalarHeader:
case c
of '0' .. '9':
my.content = "" & c
yieldToken(yamlBlockIndentationIndicator)
yieldToken(tBlockIndentationIndicator)
of '+':
yieldToken(yamlPlus)
yieldToken(tPlus)
of '-':
yieldToken(yamlDash)
yieldToken(tDash)
of '\r', '\x0A', EndOfFile:
blockScalarIndentation = lastIndentationLength
state = ylLineEnd
continue
else:
yieldError("Unexpected character in block scalar header: " & c)
yieldLexerError("Unexpected character in block scalar header: " & c)
of ylBlockScalar:
case c
of EndOfFile, '\r', '\x0A':
@ -1042,7 +1038,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
of ylAnchor:
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
yieldToken(yamlAnchor)
yieldToken(tAnchor)
state = ylInitialInLine
continue
else:
@ -1051,7 +1047,7 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
if lastSpecialChar != '\0':
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
yieldToken(yamlAlias)
yieldToken(tAlias)
state = ylInitialInLine
continue
else:
@ -1059,14 +1055,14 @@ iterator tokens*(my: var YamlLexer): YamlLexerToken {.closure.} =
lastSpecialChar = '\0'
case c
of EndOfFile, '\r', '\x0A', ' ', '\t', '{', '}', '[', ']':
yieldToken(yamlAlias)
yieldToken(tAlias)
state = ylInitialInLine
continue
of ':':
lastSpecialChar = ':'
of ',':
if flowDepth > 0:
yieldToken(yamlAlias)
yieldToken(tAlias)
state = ylInitialInLine
continue
my.content.add(c)

View File

@ -1,41 +1,13 @@
import streams, tables, strutils
import "private/lexer"
# file must be included from yaml.nim and cannot compile on its own
type
YamlParserEventKind* = enum
yamlStartDocument, yamlEndDocument, yamlStartMap, yamlEndMap,
yamlStartSequence, yamlEndSequence, yamlScalar, yamlAlias,
yamlError, yamlWarning
TagId* = distinct int
AnchorId* = distinct int
YamlParserEvent* = ref object
case kind*: YamlParserEventKind
of yamlStartMap, yamlStartSequence:
objAnchor* : AnchorId
objTag* : TagId
of yamlScalar:
scalarAnchor* : AnchorId
scalarTag* : TagId
scalarContent*: string # may not be nil (but empty)
of yamlEndMap, yamlEndSequence, yamlStartDocument, yamlEndDocument:
discard
of yamlAlias:
aliasTarget* : AnchorId
of yamlError, yamlWarning:
description* : string
line* : int
column* : int
YamlParserState = enum
ylInitial, ylSkipDirective, ylBlockLineStart, ylBlockAfterTag,
ylBlockAfterAnchor, ylBlockAfterAnchorAndTag, ylBlockAfterScalar,
ylBlockAfterAlias, ylBlockAfterColon, ylBlockMultilineScalar,
ylBlockLineEnd, ylBlockScalarHeader, ylBlockScalar, ylFlow,
ylFlowAfterObject, ylFlowAfterTag, ylFlowAfterAnchor,
ylFlowAfterAnchorAndTag, ylExpectingDocumentEnd, ylAfterDirectivesEnd
ypInitial, ypSkipDirective, ypBlockLineStart, ypBlockAfterTag,
ypBlockAfterAnchor, ypBlockAfterAnchorAndTag, ypBlockAfterScalar,
ypBlockAfterAlias, ypBlockAfterColon, ypBlockMultilineScalar,
ypBlockLineEnd, ypBlockScalarHeader, ypBlockScalar, ypFlow,
ypFlowAfterObject, ypFlowAfterTag, ypFlowAfterAnchor,
ypFlowAfterAnchorAndTag, ypExpectingDocumentEnd, ypAfterDirectivesEnd
DocumentLevelMode = enum
mBlockSequenceItem, mFlowSequenceItem, mExplicitBlockMapKey,
@ -52,10 +24,6 @@ type
BlockScalarStyle = enum
bsLiteral, bsFolded
YamlSequentialParser* = object
tags: OrderedTable[string, TagId]
anchors: OrderedTable[string, AnchorId]
const
tagExclamationMark*: TagId = 0.TagId # "!" non-specific tag
@ -72,7 +40,7 @@ proc `$`*(id: TagId): string {.borrow.}
proc `==`*(left, right: AnchorId): bool {.borrow.}
proc `$`*(id: AnchorId): string {.borrow.}
proc initParser*(): YamlSequentialParser
proc newParser*(): YamlSequentialParser
# iterators cannot be pre-declared.
#
@ -87,7 +55,8 @@ proc anchor*(parser: YamlSequentialParser, id: AnchorId): string
# implementation
proc initParser*(): YamlSequentialParser =
proc newParser*(): YamlSequentialParser =
new(result)
result.tags = initOrderedTable[string, TagId]()
result.tags["!"] = tagExclamationMark
result.tags["?"] = tagQuestionMark
@ -145,7 +114,7 @@ template yieldUnexpectedToken(expected: string = "") {.dirty.} =
msg.add(": " & $token)
yieldError(msg)
proc resolveAnchor(parser: var YamlSequentialParser, anchor: var string):
proc resolveAnchor(parser: YamlSequentialParser, anchor: var string):
AnchorId {.inline.} =
result = anchorNone
if anchor.len > 0:
@ -154,13 +123,13 @@ proc resolveAnchor(parser: var YamlSequentialParser, anchor: var string):
result = parser.anchors[anchor]
anchor = ""
proc resolveAlias(parser: var YamlSequentialParser, name: string): AnchorId =
proc resolveAlias(parser: YamlSequentialParser, name: string): AnchorId =
try:
result = parser.anchors[name]
except KeyError:
result = anchorNone
proc resolveTag(parser: var YamlSequentialParser, tag: var string,
proc resolveTag(parser: YamlSequentialParser, tag: var string,
quotedString: bool = false): TagId {.inline.} =
if tag.len == 0:
result = if quotedString: tagExclamationMark else: tagQuestionMark
@ -287,7 +256,7 @@ template handleBlockIndicator(expected, possible: openarray[DocumentLevelMode],
template startPlainScalar() {.dirty.} =
level.mode = mScalar
scalarCache = lex.content
state = ylBlockAfterScalar
state = ypBlockAfterScalar
template handleTagHandle() {.dirty.} =
let handle = lex.content
@ -296,7 +265,7 @@ template handleTagHandle() {.dirty.} =
if finished(nextToken):
yieldError("Missing tag suffix")
continue
if token != yamlTagSuffix:
if token != tTagSuffix:
yieldError("Missing tag suffix")
continue
tag = tagShorthands[handle] & lex.content
@ -305,12 +274,13 @@ template handleTagHandle() {.dirty.} =
else:
yieldError("Unknown tag shorthand: " & handle)
iterator events*(parser: var YamlSequentialParser,
input: Stream): YamlParserEvent {.closure.} =
proc parse*(parser: YamlSequentialParser,
s: Stream): iterator(): YamlParserEvent =
result = iterator(): YamlParserEvent =
var
# parsing state
lex: YamlLexer
state = ylInitial
state = ypInitial
# document state
foundYamlDirective = false
@ -335,7 +305,7 @@ iterator events*(parser: var YamlSequentialParser,
scalarCacheIsQuoted: bool = false
aliasCache = anchorNone
lex.open(input)
lex.open(s)
tagShorthands["!"] = "!"
tagShorthands["!!"] = "tag:yaml.org,2002:"
@ -344,9 +314,9 @@ iterator events*(parser: var YamlSequentialParser,
block parserLoop:
while not finished(nextToken):
case state
of ylInitial:
of ypInitial:
case token
of yamlYamlDirective:
of tYamlDirective:
if foundYamlDirective:
yieldError("Duplicate %YAML directive")
var
@ -356,7 +326,7 @@ iterator events*(parser: var YamlSequentialParser,
token = nextToken(lex)
if finished(nextToken):
yieldError("Missing or badly formatted YAML version")
if token != yamlVersionPart:
if token != tVersionPart:
yieldError("Missing or badly formatted YAML version")
if parseInt(lex.content) != version:
warn = true
@ -366,89 +336,89 @@ iterator events*(parser: var YamlSequentialParser,
yieldWarning("Unsupported version: " & actualVersion &
", trying to parse anyway")
foundYamlDirective = true
of yamlTagDirective:
of tTagDirective:
token = nextToken(lex)
if finished(nextToken):
yieldError("Incomplete %TAG directive")
if token != yamlTagHandle:
if token != tTagHandle:
yieldError("Invalid token (expected tag handle)")
let tagHandle = lex.content
token = nextToken(lex)
if finished(nextToken):
yieldError("Incomplete %TAG directive")
if token != yamlTagURI:
if token != tTagURI:
yieldError("Invalid token (expected tag URI)")
tagShorthands[tagHandle] = lex.content
of yamlUnknownDirective:
of tUnknownDirective:
yieldWarning("Unknown directive: " & lex.content)
state = ylSkipDirective
of yamlComment:
state = ypSkipDirective
of tComment:
discard
of yamlDirectivesEnd:
of tDirectivesEnd:
yield YamlParserEvent(kind: yamlStartDocument)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylAfterDirectivesEnd
of yamlDocumentEnd, yamlStreamEnd:
state = ypAfterDirectivesEnd
of tDocumentEnd, tStreamEnd:
yield YamlParserEvent(kind: yamlStartDocument)
yieldDocumentEnd()
else:
yield YamlParserEvent(kind: yamlStartDocument)
state = ylBlockLineStart
state = ypBlockLineStart
continue
of ylSkipDirective:
if token notin [yamlUnknownDirectiveParam, yamlTagHandle,
yamlTagURI, yamlVersionPart, yamlComment]:
state = ylInitial
of ypSkipDirective:
if token notin [tUnknownDirectiveParam, tTagHandle,
tTagURI, tVersionPart, tComment]:
state = ypInitial
continue
of ylAfterDirectivesEnd:
of ypAfterDirectivesEnd:
case token
of yamlTagHandle:
of tTagHandle:
handleTagHandle()
state = ylBlockLineEnd
of yamlComment:
state = ylBlockLineEnd
of yamlLineStart:
state = ylBlockLineStart
state = ypBlockLineEnd
of tComment:
state = ypBlockLineEnd
of tLineStart:
state = ypBlockLineStart
else:
yieldUnexpectedToken()
of ylBlockLineStart:
of ypBlockLineStart:
case token
of yamlLineStart:
of tLineStart:
discard
of yamlDash:
of tDash:
handleBlockIndicator([mBlockSequenceItem], [],
mBlockSequenceItem, yamlStartSequence)
of yamlQuestionmark:
of tQuestionmark:
handleBlockIndicator([mImplicitBlockMapKey, mBlockMapValue],
[mExplicitBlockMapKey],
mExplicitBlockMapKey, yamlStartMap)
of yamlColon:
of tColon:
handleBlockIndicator([mExplicitBlockMapKey],
[mBlockMapValue, mImplicitBlockMapKey],
mBlockMapValue, yamlStartMap, true)
of yamlPipe, yamlGreater:
blockScalar = if token == yamlPipe: bsLiteral else: bsFolded
of tPipe, tGreater:
blockScalar = if token == tPipe: bsLiteral else: bsFolded
blockScalarIndentation = -1
lineStrip = lsClip
state = ylBlockScalarHeader
state = ypBlockScalarHeader
scalarCache = ""
level.mode = mScalar
of yamlTagHandle:
of tTagHandle:
leaveMoreIndentedLevels()
handleTagHandle()
level.indentationColumn = lex.column
state = ylBlockAfterTag
of yamlVerbatimTag:
state = ypBlockAfterTag
of tVerbatimTag:
tag = lex.content
state = ylBlockAfterTag
state = ypBlockAfterTag
level.indentationColumn = lex.column
of yamlAnchor:
of tAnchor:
leaveMoreIndentedLevels()
anchor = lex.content
level.indentationColumn = lex.column
state = ylBlockAfterAnchor
of yamlScalarPart:
state = ypBlockAfterAnchor
of tScalarPart:
leaveMoreIndentedLevels()
case level.mode
of mUnknown:
@ -469,18 +439,18 @@ iterator events*(parser: var YamlSequentialParser,
continue
else:
yieldError("Unexpected scalar in " & $level.mode)
state = ylBlockAfterScalar
of lexer.yamlScalar:
state = ypBlockAfterScalar
of tScalar:
leaveMoreIndentedLevels()
case level.mode
of mUnknown, mImplicitBlockMapKey:
scalarCache = lex.content
scalarCacheIsQuoted = true
scalarIndentation = lex.column
state = ylBlockAfterScalar
state = ypBlockAfterScalar
else:
yieldError("Unexpected scalar")
of lexer.yamlAlias:
of tAlias:
aliasCache = resolveAlias(parser, lex.content)
if aliasCache == anchorNone:
yieldError("[alias] Unknown anchor: " & lex.content)
@ -492,60 +462,60 @@ iterator events*(parser: var YamlSequentialParser,
leaveMoreIndentedLevels()
case level.mode
of mUnknown, mImplicitBlockMapKey, mBlockSequenceItem:
state = ylBlockAfterAlias
state = ypBlockAfterAlias
else:
yieldError("Unexpected alias")
of yamlStreamEnd:
of tStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
of yamlDocumentEnd:
of tDocumentEnd:
closeAllLevels()
yieldDocumentEnd()
state = ylInitial
of yamlOpeningBrace:
state = ylFlow
state = ypInitial
of tOpeningBrace:
state = ypFlow
continue
of yamlOpeningBracket:
state = ylFlow
of tOpeningBracket:
state = ypFlow
continue
else:
yieldUnexpectedToken()
of ylBlockMultilineScalar:
of ypBlockMultilineScalar:
case token
of yamlScalarPart:
of tScalarPart:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
state = ypBlockLineStart
continue
scalarCache &= " " & lex.content
state = ylBlockLineEnd
of yamlLineStart:
state = ypBlockLineEnd
of tLineStart:
discard
of yamlColon, yamlDash, yamlQuestionMark:
of tColon, tDash, tQuestionmark:
leaveMoreIndentedLevels()
if level.mode != mScalar:
state = ylBlockLineStart
state = ypBlockLineStart
continue
yieldUnexpectedToken()
of yamlDocumentEnd, yamlStreamEnd:
of tDocumentEnd, tStreamEnd:
closeAllLevels()
scalarCache = nil
state = ylInitial
state = ypInitial
continue
of yamlDirectivesEnd:
of tDirectivesEnd:
closeAllLevels()
state = ylAfterDirectivesEnd
state = ypAfterDirectivesEnd
continue
of lexer.yamlAlias:
of tAlias:
leaveMoreIndentedLevels()
state = ylBlockLineStart
state = ypBlockLineStart
continue
else:
yieldUnexpectedToken()
of ylBlockAfterScalar:
of ypBlockAfterScalar:
case token
of yamlColon:
of tColon:
assert level.mode in [mUnknown, mImplicitBlockMapKey, mScalar]
if level.mode in [mUnknown, mScalar]:
level.indentationColumn = scalarIndentation
@ -559,8 +529,8 @@ iterator events*(parser: var YamlSequentialParser,
indentationColumn: -1)
yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarCache = nil
state = ylBlockAfterColon
of yamlLineStart:
state = ypBlockAfterColon
of tLineStart:
if level.mode == mImplicitBlockMapKey:
yieldError("Missing colon after implicit map key")
if level.mode != mScalar:
@ -569,10 +539,10 @@ iterator events*(parser: var YamlSequentialParser,
if ancestry.len > 0:
level = ancestry.pop()
else:
state = ylExpectingDocumentEnd
state = ypExpectingDocumentEnd
else:
state = ylBlockMultilineScalar
of yamlStreamEnd:
state = ypBlockMultilineScalar
of tStreamEnd:
yieldScalar(scalarCache, scalarCacheIsQuoted)
scalarCache = nil
if ancestry.len > 0:
@ -582,9 +552,9 @@ iterator events*(parser: var YamlSequentialParser,
break
else:
yieldUnexpectedToken()
of ylBlockAfterAlias:
of ypBlockAfterAlias:
case token
of yamlColon:
of tColon:
assert level.mode in [mUnknown, mImplicitBlockMapKey]
if level.mode == mUnknown:
yield YamlParserEvent(kind: yamlStartMap,
@ -595,122 +565,122 @@ iterator events*(parser: var YamlSequentialParser,
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
yield YamlParserEvent(kind: yamlAlias, aliasTarget: aliasCache)
state = ylBlockAfterColon
of yamlLineStart:
state = ypBlockAfterColon
of tLineStart:
if level.mode == mImplicitBlockMapKey:
yieldError("Missing colon after implicit map key")
if level.mode == mUnknown:
assert ancestry.len > 0
level = ancestry.pop()
yield YamlParserEvent(kind: yamlAlias, aliasTarget: aliasCache)
state = ylBlockLineStart
of yamlStreamEnd:
state = ypBlockLineStart
of tStreamEnd:
yield YamlParserEvent(kind: yamlAlias, aliasTarget: aliasCache)
if level.mode == mUnknown:
assert ancestry.len > 0
level = ancestry.pop()
state = ylBlockLineEnd
state = ypBlockLineEnd
continue
else:
yieldUnexpectedToken()
of ylBlockAfterTag:
of ypBlockAfterTag:
if mustLeaveLevel(lex.column, ancestry):
leaveMoreIndentedLevels()
state = ylBlockLineStart
state = ypBlockLineStart
continue
case token
of yamlAnchor:
of tAnchor:
anchor = lex.content
state = ylBlockAfterAnchorAndTag
of lexer.yamlScalar, yamlColon, yamlStreamEnd:
state = ylBlockLineStart
state = ypBlockAfterAnchorAndTag
of tScalar, tColon, tStreamEnd:
state = ypBlockLineStart
continue
of yamlScalarPart:
of tScalarPart:
startPlainScalar()
of yamlLineStart:
state = ylBlockLineStart
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
of tLineStart:
state = ypBlockLineStart
of tOpeningBracket, tOpeningBrace:
state = ypFlow
continue
else:
yieldUnexpectedToken()
of ylBlockAfterAnchor:
of ypBlockAfterAnchor:
if mustLeaveLevel(lex.column, ancestry):
leaveMoreIndentedLevels()
state = ylBlockLineStart
state = ypBlockLineStart
continue
case token
of lexer.yamlScalar, yamlColon, yamlStreamEnd:
state = ylBlockLineStart
of tScalar, tColon, tStreamEnd:
state = ypBlockLineStart
continue
of lexer.yamlScalarPart:
of tScalarPart:
startPlainScalar()
of yamlLineStart:
of tLineStart:
discard
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
of tOpeningBracket, tOpeningBrace:
state = ypFlow
continue
of yamlTagHandle:
of tTagHandle:
handleTagHandle()
state = ylBlockAfterAnchorAndTag
of yamlVerbatimTag:
state = ypBlockAfterAnchorAndTag
of tVerbatimTag:
tag = lex.content
state = ylBlockAfterAnchorAndTag
state = ypBlockAfterAnchorAndTag
level.indentationColumn = lex.column
else:
yieldUnexpectedToken()
of ylBlockAfterAnchorAndTag:
of ypBlockAfterAnchorAndTag:
if mustLeaveLevel(lex.column, ancestry):
leaveMoreIndentedLevels()
state = ylBlockLineStart
state = ypBlockLineStart
continue
case token
of lexer.yamlScalar, yamlColon, yamlStreamEnd:
state = ylBlockLineStart
of tScalar, tColon, tStreamEnd:
state = ypBlockLineStart
continue
of yamlScalarPart:
of tScalarPart:
startPlainScalar()
of yamlLineStart:
of tLineStart:
discard
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
of tOpeningBracket, tOpeningBrace:
state = ypFlow
continue
else:
yieldUnexpectedToken()
of ylBlockAfterColon:
of ypBlockAfterColon:
case token
of lexer.yamlScalar:
of tScalar:
yieldScalar(lex.content, true)
level = ancestry.pop()
assert level.mode == mBlockMapValue
level.mode = mImplicitBlockMapKey
state = ylBlockLineEnd
of yamlScalarPart:
state = ypBlockLineEnd
of tScalarPart:
startPlainScalar()
of yamlLineStart:
state = ylBlockLineStart
of yamlStreamEnd:
of tLineStart:
state = ypBlockLineStart
of tStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
of yamlOpeningBracket, yamlOpeningBrace:
state = ylFlow
of tOpeningBracket, tOpeningBrace:
state = ypFlow
continue
of yamlPipe, yamlGreater:
blockScalar = if token == yamlPipe: bsLiteral else: bsFolded
of tPipe, tGreater:
blockScalar = if token == tPipe: bsLiteral else: bsFolded
blockScalarIndentation = -1
lineStrip = lsClip
state = ylBlockScalarHeader
state = ypBlockScalarHeader
scalarCache = ""
level.mode = mScalar
of yamlTagHandle:
of tTagHandle:
handleTagHandle()
state = ylBlockAfterTag
of yamlAnchor:
state = ypBlockAfterTag
of tAnchor:
level.indentationColumn = lex.column
anchor = lex.content
state = ylBlockAfterAnchor
of lexer.yamlAlias:
state = ypBlockAfterAnchor
of tAlias:
var noAnchor = false
try:
aliasCache = parser.anchors[lex.content]
@ -721,45 +691,45 @@ iterator events*(parser: var YamlSequentialParser,
yieldError("[alias] Unknown anchor: " & lex.content)
yield YamlParserEvent(kind: yamlAlias, aliasTarget: aliasCache)
level = ancestry.pop()
state = ylBlockLineEnd
state = ypBlockLineEnd
else:
yieldUnexpectedToken("scalar or line end")
of ylBlockLineEnd:
of ypBlockLineEnd:
case token
of yamlLineStart:
state = if level.mode == mScalar: ylBlockMultilineScalar else:
ylBlockLineStart
of yamlStreamEnd:
of tLineStart:
state = if level.mode == mScalar: ypBlockMultilineScalar else:
ypBlockLineStart
of tStreamEnd:
closeAllLevels()
yield YamlParserEvent(kind: yamlEndDocument)
break
else:
yieldUnexpectedToken("line end")
of ylBlockScalarHeader:
of ypBlockScalarHeader:
case token
of yamlPlus:
of tPlus:
if lineStrip != lsClip:
yieldError("Multiple chomping indicators!")
else:
lineStrip = lsKeep
of yamlDash:
of tDash:
if lineStrip != lsClip:
yieldError("Multiple chomping indicators!")
else:
lineStrip = lsStrip
of yamlBlockIndentationIndicator:
of tBlockIndentationIndicator:
if blockScalarIndentation != -1:
yieldError("Multiple indentation indicators!")
else:
blockScalarIndentation = parseInt(lex.content)
of yamlLineStart:
of tLineStart:
blockScalarTrailing = ""
state = ylBlockScalar
state = ypBlockScalar
else:
yieldUnexpectedToken()
of ylBlockScalar:
of ypBlockScalar:
case token
of yamlLineStart:
of tLineStart:
if level.indentationColumn == -1:
discard
else:
@ -783,14 +753,14 @@ iterator events*(parser: var YamlSequentialParser,
lex.content[level.indentationColumn..^1]
blockScalarTrailing = ""
of yamlScalarPart:
of tScalarPart:
if ancestry.high > 0:
if ancestry[ancestry.high].indicatorColumn >= lex.column or
ancestry[ancestry.high].indicatorColumn == -1 and
ancestry[ancestry.high].indentationColumn >= lex.column:
# todo: trailing chomping?
closeLevel(level)
state = ylBlockLineStart
state = ypBlockLineStart
continue
if level.indentationColumn == -1:
level.indentationColumn = lex.column
@ -808,20 +778,20 @@ iterator events*(parser: var YamlSequentialParser,
scalarCache &= blockScalarTrailing
closeLevel(level)
if ancestry.len == 0:
state = ylExpectingDocumentEnd
state = ypExpectingDocumentEnd
else:
level = ancestry.pop()
state = ylBlockLineStart
state = ypBlockLineStart
continue
of ylFlow:
of ypFlow:
case token
of yamlLineStart:
of tLineStart:
discard
of lexer.yamlScalar, yamlScalarPart:
yieldScalar(lex.content, token == lexer.yamlScalar)
of tScalar, tScalarPart:
yieldScalar(lex.content, token == tScalar)
level = ancestry.pop()
state = ylFlowAfterObject
of yamlColon:
state = ypFlowAfterObject
of tColon:
yieldScalar()
level = ancestry.pop()
if level.mode == mFlowMapKey:
@ -831,7 +801,7 @@ iterator events*(parser: var YamlSequentialParser,
indentationColumn: -1)
else:
yieldUnexpectedToken("scalar, comma or map end")
of yamlComma:
of tComma:
yieldScalar()
level = ancestry.pop()
case level.mode
@ -844,7 +814,7 @@ iterator events*(parser: var YamlSequentialParser,
yieldScalar()
else:
yieldError("Internal error! Please report this bug.")
of yamlOpeningBrace:
of tOpeningBrace:
if level.mode != mUnknown:
yieldUnexpectedToken()
level.mode = mFlowMapKey
@ -852,7 +822,7 @@ iterator events*(parser: var YamlSequentialParser,
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of yamlOpeningBracket:
of tOpeningBracket:
if level.mode != mUnknown:
yieldUnexpectedToken()
level.mode = mFlowSequenceItem
@ -860,7 +830,7 @@ iterator events*(parser: var YamlSequentialParser,
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
of yamlClosingBrace:
of tClosingBrace:
if level.mode == mUnknown:
yieldScalar()
level = ancestry.pop()
@ -871,12 +841,12 @@ iterator events*(parser: var YamlSequentialParser,
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
state = ypFlowAfterObject
else:
state = ylBlockLineEnd
state = ypBlockLineEnd
else:
state = ylExpectingDocumentEnd
of yamlClosingBracket:
state = ypExpectingDocumentEnd
of tClosingBracket:
if level.mode == mUnknown:
yieldScalar()
level = ancestry.pop()
@ -888,58 +858,58 @@ iterator events*(parser: var YamlSequentialParser,
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
state = ypFlowAfterObject
else:
state = ylBlockLineEnd
state = ypBlockLineEnd
else:
state = ylExpectingDocumentEnd
of yamlTagHandle:
state = ypExpectingDocumentEnd
of tTagHandle:
handleTagHandle()
state = ylFlowAfterTag
of yamlAnchor:
state = ypFlowAfterTag
of tAnchor:
anchor = lex.content
state = ylFlowAfterAnchor
of lexer.yamlAlias:
state = ypFlowAfterAnchor
of tAlias:
yield YamlParserEvent(kind: yamlAlias,
aliasTarget: resolveAlias(parser, lex.content))
state = ylFlowAfterObject
state = ypFlowAfterObject
level = ancestry.pop()
else:
yieldUnexpectedToken()
of ylFlowAfterTag:
of ypFlowAfterTag:
case token
of yamlTagHandle:
of tTagHandle:
yieldError("Multiple tags on same node!")
of yamlAnchor:
of tAnchor:
anchor = lex.content
state = ylFlowAfterAnchorAndTag
state = ypFlowAfterAnchorAndTag
else:
state = ylFlow
state = ypFlow
continue
of ylFlowAfterAnchor:
of ypFlowAfterAnchor:
case token
of yamlAnchor:
of tAnchor:
yieldError("Multiple anchors on same node!")
of yamlTagHandle:
of tTagHandle:
handleTagHandle()
state = ylFlowAfterAnchorAndTag
state = ypFlowAfterAnchorAndTag
else:
state = ylFlow
state = ypFlow
continue
of ylFlowAfterAnchorAndTag:
of ypFlowAfterAnchorAndTag:
case token
of yamlAnchor:
of tAnchor:
yieldError("Multiple anchors on same node!")
of yamlTagHandle:
of tTagHandle:
yieldError("Multiple tags on same node!")
else:
state = ylFlow
state = ypFlow
continue
of ylFlowAfterObject:
of ypFlowAfterObject:
case token
of yamlLineStart:
of tLineStart:
discard
of yamlColon:
of tColon:
if level.mode != mFlowMapKey:
yieldUnexpectedToken()
else:
@ -947,24 +917,24 @@ iterator events*(parser: var YamlSequentialParser,
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
of yamlComma:
state = ypFlow
of tComma:
case level.mode
of mFlowSequenceItem:
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
state = ypFlow
of mFlowMapValue:
level.mode = mFlowMapKey
ancestry.add(level)
level = DocumentLevel(mode: mUnknown, indicatorColumn: -1,
indentationColumn: -1)
state = ylFlow
state = ypFlow
else:
echo "level.mode = ", level.mode
yieldUnexpectedToken()
of yamlClosingBrace:
of tClosingBrace:
if level.mode != mFlowMapValue:
yieldUnexpectedToken()
else:
@ -973,12 +943,12 @@ iterator events*(parser: var YamlSequentialParser,
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
state = ypFlowAfterObject
else:
state = ylBlockLineEnd
state = ypBlockLineEnd
else:
state = ylExpectingDocumentEnd
of yamlClosingBracket:
state = ypExpectingDocumentEnd
of tClosingBracket:
if level.mode != mFlowSequenceItem:
yieldUnexpectedToken()
else:
@ -987,23 +957,23 @@ iterator events*(parser: var YamlSequentialParser,
level = ancestry.pop()
case level.mode
of mFlowMapKey, mFlowMapValue, mFlowSequenceItem:
state = ylFlowAfterObject
state = ypFlowAfterObject
else:
state = ylBlockLineEnd
state = ypBlockLineEnd
else:
state = ylExpectingDocumentEnd
state = ypExpectingDocumentEnd
else:
yieldUnexpectedToken()
of ylExpectingDocumentEnd:
of ypExpectingDocumentEnd:
case token
of yamlComment, yamlLineStart:
of tComment, tLineStart:
discard
of yamlStreamEnd, yamlDocumentEnd:
of tStreamEnd, tDocumentEnd:
yieldDocumentEnd()
state = ylInitial
of yamlDirectivesEnd:
state = ypInitial
of tDirectivesEnd:
yieldDocumentEnd()
state = ylAfterDirectivesEnd
state = ypAfterDirectivesEnd
continue
else:
yieldUnexpectedToken("document end")

45
src/yaml.nim Normal file
View File

@ -0,0 +1,45 @@
import streams, unicode, lexbase, tables, strutils
type
YamlTypeHint* = enum
yTypeInteger, yTypeFloat, yTypeBoolean, yTypeNull, yTypeString,
yTypeUnknown
YamlParserEventKind* = enum
yamlStartDocument, yamlEndDocument, yamlStartMap, yamlEndMap,
yamlStartSequence, yamlEndSequence, yamlScalar, yamlAlias,
yamlError, yamlWarning
TagId* = distinct int
AnchorId* = distinct int
YamlParserEvent* = ref object
case kind*: YamlParserEventKind
of yamlStartMap, yamlStartSequence:
objAnchor* : AnchorId
objTag* : TagId
of yamlScalar:
scalarAnchor* : AnchorId
scalarTag* : TagId
scalarContent*: string # may not be nil (but empty)
of yamlEndMap, yamlEndSequence, yamlStartDocument, yamlEndDocument:
discard
of yamlAlias:
aliasTarget* : AnchorId
of yamlError, yamlWarning:
description* : string
line* : int
column* : int
YamlSequentialParser* = ref object
tags: OrderedTable[string, TagId]
anchors: OrderedTable[string, AnchorId]
# interface
proc parse*(parser: YamlSequentialParser, s: Stream): iterator(): YamlParserEvent
# implementation
include private.lexer
include private.sequential

View File

@ -1,10 +1,15 @@
import "../src/yaml/private/lexer"
import streams, unicode
import streams, unicode, lexbase
import unittest
type
YamlTypeHint* = enum
yTypeInteger, yTypeFloat, yTypeBoolean, yTypeNull, yTypeString,
yTypeUnknown
include "../src/private/lexer"
type BasicLexerToken = tuple[kind: YamlLexerToken, content: string,
typeHint: YamlLexerTypeHint]
typeHint: YamlTypeHint]
template ensure(input: string, expected: openarray[BasicLexerToken]) =
var
@ -18,7 +23,7 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) =
fail()
break
if token != expected[i].kind:
if token == yamlError:
if token == tError:
echo "got lexer error: " & lex.content
else:
echo "wrong token kind (expected ", expected[i], ", got ",
@ -31,7 +36,7 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) =
expected[i].content, "\", got \"", lex.content, "\")"
fail()
break
if token == yamlScalarPart:
if token == tScalarPart:
if lex.typeHint != expected[i].typeHint:
echo "wrong type hint (expected ", expected[i].typeHint,
", got ", lex.typeHint, ")"
@ -43,131 +48,131 @@ template ensure(input: string, expected: openarray[BasicLexerToken]) =
expected[i].kind, ")"
proc t(kind: YamlLexerToken, content: string,
typeHint: YamlLexerTypeHint = yTypeString): BasicLexerToken =
typeHint: YamlTypeHint = yTypeString): BasicLexerToken =
(kind: kind, content: content, typeHint: typeHint)
suite "Lexing":
test "Lexing: YAML Directive":
ensure("%YAML 1.2", [t(yamlYamlDirective, nil),
t(yamlVersionPart, "1"),
t(yamlVersionPart, "2"),
t(yamlStreamEnd, nil)])
ensure("%YAML 1.2", [t(tYamlDirective, nil),
t(tVersionPart, "1"),
t(tVersionPart, "2"),
t(tStreamEnd, nil)])
test "Lexing: TAG Directive":
ensure("%TAG !t! tag:http://example.com/",
[t(yamlTagDirective, nil),
t(yamlTagHandle, "!t!"),
t(yamlTagURI, "tag:http://example.com/"),
t(yamlStreamEnd, nil)])
[t(tTagDirective, nil),
t(tTagHandle, "!t!"),
t(tTagURI, "tag:http://example.com/"),
t(tStreamEnd, nil)])
test "Lexing: Unknown Directive":
ensure("%FOO bar baz", [t(yamlUnknownDirective, "%FOO"),
t(yamlUnknownDirectiveParam, "bar"),
t(yamlUnknownDirectiveParam, "baz"),
t(yamlStreamEnd, nil)])
ensure("%FOO bar baz", [t(tUnknownDirective, "%FOO"),
t(tUnknownDirectiveParam, "bar"),
t(tUnknownDirectiveParam, "baz"),
t(tStreamEnd, nil)])
test "Lexing: Comments after Directives":
ensure("%YAML 1.2 # version\n# at line start\n # indented\n%FOO",
[t(yamlYamlDirective, nil),
t(yamlVersionPart, "1"),
t(yamlVersionPart, "2"),
t(yamlComment, " version"),
t(yamlComment, " at line start"),
t(yamlComment, " indented"),
t(yamlUnknownDirective, "%FOO"),
t(yamlStreamEnd, nil)])
[t(tYamlDirective, nil),
t(tVersionPart, "1"),
t(tVersionPart, "2"),
t(tComment, " version"),
t(tComment, " at line start"),
t(tComment, " indented"),
t(tUnknownDirective, "%FOO"),
t(tStreamEnd, nil)])
test "Lexing: Directives End":
ensure("---", [t(yamlDirectivesEnd, nil),
t(yamlStreamEnd, nil)])
ensure("---", [t(tDirectivesEnd, nil),
t(tStreamEnd, nil)])
test "Lexing: Document End":
ensure("...", [t(yamlLineStart, nil),
t(yamlDocumentEnd, nil),
t(yamlStreamEnd, nil)])
ensure("...", [t(tLineStart, nil),
t(tDocumentEnd, nil),
t(tStreamEnd, nil)])
test "Lexing: Directive after Document End":
ensure("content\n...\n%YAML 1.2",
[t(yamlLineStart, ""),
t(yamlScalarPart, "content"),
t(yamlLineStart, ""),
t(yamlDocumentEnd, nil),
t(yamlYamlDirective, nil),
t(yamlVersionPart, "1"),
t(yamlVersionPart, "2"),
t(yamlStreamEnd, nil)])
[t(tLineStart, ""),
t(tScalarPart, "content"),
t(tLineStart, ""),
t(tDocumentEnd, nil),
t(tYamlDirective, nil),
t(tVersionPart, "1"),
t(tVersionPart, "2"),
t(tStreamEnd, nil)])
test "Lexing: Plain Scalar (alphanumeric)":
ensure("abA03rel4", [t(yamlLineStart, ""),
t(yamlScalarPart, "abA03rel4"),
t(yamlStreamEnd, nil)])
ensure("abA03rel4", [t(tLineStart, ""),
t(tScalarPart, "abA03rel4"),
t(tStreamEnd, nil)])
test "Lexing: Plain Scalar (with spaces)":
ensure("test content", [t(yamlLineStart, ""),
t(yamlScalarPart, "test content"),
t(yamlStreamEnd, nil)])
ensure("test content", [t(tLineStart, ""),
t(tScalarPart, "test content"),
t(tStreamEnd, nil)])
test "Lexing: Plain Scalar (with special chars)":
ensure(":test ?content -with #special !chars",
[t(yamlLineStart, nil),
t(yamlScalarPart, ":test ?content -with #special !chars"),
t(yamlStreamEnd, nil)])
[t(tLineStart, nil),
t(tScalarPart, ":test ?content -with #special !chars"),
t(tStreamEnd, nil)])
test "Lexing: Plain Scalar (starting with %)":
ensure("---\n%test", [t(yamlDirectivesEnd, nil),
t(yamlLineStart, ""),
t(yamlScalarPart, "%test"),
t(yamlStreamEnd, nil)])
ensure("---\n%test", [t(tDirectivesEnd, nil),
t(tLineStart, ""),
t(tScalarPart, "%test"),
t(tStreamEnd, nil)])
test "Lexing: Single Quoted Scalar":
ensure("'? test - content! '", [t(yamlLineStart, ""),
t(yamlScalar, "? test - content! "),
t(yamlStreamEnd, nil)])
ensure("'? test - content! '", [t(tLineStart, ""),
t(tScalar, "? test - content! "),
t(tStreamEnd, nil)])
test "Lexing: Single Quoted Scalar (escaped single quote inside)":
ensure("'test '' content'", [t(yamlLineStart, ""),
t(yamlScalar, "test ' content"),
t(yamlStreamEnd, nil)])
ensure("'test '' content'", [t(tLineStart, ""),
t(tScalar, "test ' content"),
t(tStreamEnd, nil)])
test "Lexing: Doubly Quoted Scalar":
ensure("\"test content\"", [t(yamlLineStart, ""),
t(yamlScalar, "test content"),
t(yamlStreamEnd, nil)])
ensure("\"test content\"", [t(tLineStart, ""),
t(tScalar, "test content"),
t(tStreamEnd, nil)])
test "Lexing: Doubly Quoted Scalar (escaping)":
ensure(""""\t\\\0\""""", [t(yamlLineStart, ""),
t(yamlScalar, "\t\\\0\""),
t(yamlStreamEnd, nil)])
ensure(""""\t\\\0\""""", [t(tLineStart, ""),
t(tScalar, "\t\\\0\""),
t(tStreamEnd, nil)])
test "Lexing: Doubly Quoted Scalar (unicode escaping)":
ensure(""""\x42\u4243\U00424344"""",
[t(yamlLineStart, ""),
t(yamlScalar, "\x42" & toUTF8(cast[Rune](0x4243)) &
[t(tLineStart, ""),
t(tScalar, "\x42" & toUTF8(cast[Rune](0x4243)) &
toUTF8(cast[Rune](0x424344))),
t(yamlStreamEnd, nil)])
t(tStreamEnd, nil)])
test "Lexing: Block Array":
ensure("""
- a
- b""", [t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "a"),
t(yamlLineStart, ""), t(yamlDash, nil), t(yamlScalarPart, "b"),
t(yamlStreamEnd, nil)])
- b""", [t(tLineStart, ""), t(tDash, nil), t(tScalarPart, "a"),
t(tLineStart, ""), t(tDash, nil), t(tScalarPart, "b"),
t(tStreamEnd, nil)])
test "Lexing: Block Map with Implicit Keys":
ensure("""
foo: bar
herp: derp""", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlColon, nil), t(yamlScalarPart, "bar"),
t(yamlLineStart, ""), t(yamlScalarPart, "herp"),
t(yamlColon, nil), t(yamlScalarPart, "derp"),
t(yamlStreamEnd, nil)])
herp: derp""", [t(tLineStart, ""), t(tScalarPart, "foo"),
t(tColon, nil), t(tScalarPart, "bar"),
t(tLineStart, ""), t(tScalarPart, "herp"),
t(tColon, nil), t(tScalarPart, "derp"),
t(tStreamEnd, nil)])
test "Lexing: Block Map with Explicit Keys":
ensure("""
? foo
: bar""", [t(yamlLineStart, ""), t(yamlQuestionmark, nil),
t(yamlScalarPart, "foo"), t(yamlLineStart, ""), t(yamlColon, nil),
t(yamlScalarPart, "bar"), t(yamlStreamEnd, nil)])
: bar""", [t(tLineStart, ""), t(tQuestionmark, nil),
t(tScalarPart, "foo"), t(tLineStart, ""), t(tColon, nil),
t(tScalarPart, "bar"), t(tStreamEnd, nil)])
test "Lexing: Indentation":
ensure("""
@ -176,41 +181,41 @@ foo:
- baz
- biz
herp: derp""",
[t(yamlLineStart, ""), t(yamlScalarPart, "foo"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlScalarPart, "bar"), t(yamlColon, nil),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "baz"),
t(yamlLineStart, " "), t(yamlDash, nil), t(yamlScalarPart, "biz"),
t(yamlLineStart, " "), t(yamlScalarPart, "herp"), t(yamlColon, nil),
t(yamlScalarPart, "derp"), t(yamlStreamEnd, nil)])
[t(tLineStart, ""), t(tScalarPart, "foo"), t(tColon, nil),
t(tLineStart, " "), t(tScalarPart, "bar"), t(tColon, nil),
t(tLineStart, " "), t(tDash, nil), t(tScalarPart, "baz"),
t(tLineStart, " "), t(tDash, nil), t(tScalarPart, "biz"),
t(tLineStart, " "), t(tScalarPart, "herp"), t(tColon, nil),
t(tScalarPart, "derp"), t(tStreamEnd, nil)])
test "Lexing: Anchor":
ensure("foo: &bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlColon, nil), t(yamlAnchor, "bar"),
t(yamlStreamEnd, nil)])
ensure("foo: &bar", [t(tLineStart, ""), t(tScalarPart, "foo"),
t(tColon, nil), t(tAnchor, "bar"),
t(tStreamEnd, nil)])
test "Lexing: Alias":
ensure("foo: *bar", [t(yamlLineStart, ""), t(yamlScalarPart, "foo"),
t(yamlColon, nil), t(yamlAlias, "bar"),
t(yamlStreamEnd, nil)])
ensure("foo: *bar", [t(tLineStart, ""), t(tScalarPart, "foo"),
t(tColon, nil), t(tAlias, "bar"),
t(tStreamEnd, nil)])
test "Lexing: Tag handle":
ensure("!t!str tagged", [t(yamlLineStart, ""), t(yamlTagHandle, "!t!"),
t(yamlTagSuffix, "str"),
t(yamlScalarPart, "tagged"),
t(yamlStreamEnd, nil)])
ensure("!t!str tagged", [t(tLineStart, ""), t(tTagHandle, "!t!"),
t(tTagSuffix, "str"),
t(tScalarPart, "tagged"),
t(tStreamEnd, nil)])
test "Lexing: Verbatim tag handle":
ensure("!<tag:http://example.com/str> tagged",
[t(yamlLineStart, ""),
t(yamlVerbatimTag, "tag:http://example.com/str"),
t(yamlScalarPart, "tagged"), t(yamlStreamEnd, nil)])
[t(tLineStart, ""),
t(tVerbatimTag, "tag:http://example.com/str"),
t(tScalarPart, "tagged"), t(tStreamEnd, nil)])
test "Lexing: Type hints":
ensure("false\nnull\nstring\n-13\n42.25\n-4e+3\n5.42e78",
[t(yamlLineStart, ""), t(yamlScalarPart, "false", yTypeBoolean),
t(yamlLineStart, ""), t(yamlScalarPart, "null", yTypeNull),
t(yamlLineStart, ""), t(yamlScalarPart, "string", yTypeString),
t(yamlLineStart, ""), t(yamlScalarPart, "-13", yTypeInteger),
t(yamlLineStart, ""), t(yamlScalarPart, "42.25", yTypeFloat),
t(yamlLineStart, ""), t(yamlScalarPart, "-4e+3", yTypeFloat),
t(yamlLineStart, ""), t(yamlScalarPart, "5.42e78", yTypeFloat),
t(yamlStreamEnd, nil)])
[t(tLineStart, ""), t(tScalarPart, "false", yTypeBoolean),
t(tLineStart, ""), t(tScalarPart, "null", yTypeNull),
t(tLineStart, ""), t(tScalarPart, "string", yTypeString),
t(tLineStart, ""), t(tScalarPart, "-13", yTypeInteger),
t(tLineStart, ""), t(tScalarPart, "42.25", yTypeFloat),
t(tLineStart, ""), t(tScalarPart, "-4e+3", yTypeFloat),
t(tLineStart, ""), t(tScalarPart, "5.42e78", yTypeFloat),
t(tStreamEnd, nil)])

View File

@ -1,7 +1,7 @@
import "../src/yaml/sequential"
import "../src/yaml"
import streams
import unittest
import streams
proc startDoc(): YamlParserEvent =
new(result)
@ -97,9 +97,11 @@ proc printDifference(expected, actual: YamlParserEvent) =
echo "Unknown difference in event kind " & $expected.kind
template ensure(input: string, expected: varargs[YamlParserEvent]) {.dirty.} =
var i = 0
var
i = 0
events = parser.parse(newStringStream(input))
for token in parser.events(newStringStream(input)):
for token in events():
if i >= expected.len:
echo "received more tokens than expected (next token = ",
token.kind, ")"
@ -114,7 +116,7 @@ template ensure(input: string, expected: varargs[YamlParserEvent]) {.dirty.} =
suite "Parsing":
setup:
var parser = initParser()
var parser = newParser()
test "Parsing: Simple Scalar":
ensure("Scalar", startDoc(), scalar("Scalar"), endDoc())