translated parser from AdaYaml

This commit is contained in:
Felix Krause 2020-11-03 21:17:31 +01:00
parent 427084f44a
commit 05b8528f3e
6 changed files with 1406 additions and 1594 deletions

@ -1 +1 @@
Subproject commit e11b84891a3cd7aa4460f9d09c99283c974c02c6 Subproject commit fa266d172ef7b2cdd35219b8e677fa5e0ae86269

306
yaml/data.nim Normal file
View File

@ -0,0 +1,306 @@
import hashes
import private/internal
type
Anchor* = distinct string ## \
## An ``Anchor`` identifies an anchor in the current document.
## It is not necessarily unique and references to an anchor must be
## resolved immediately on occurrence.
##
## Anchor provides the operator `$` for converting to string, `==` for
## comparison, and `hash` for usage in a hashmap.
TagId* = distinct int ## \
## A ``TagId`` identifies a tag URI, like for example
## ``"tag:yaml.org,2002:str"``. The URI corresponding to a ``TagId`` can
## be queried from the `TagLibrary <#TagLibrary>`_ which was
## used to create this ``TagId``; e.g. when you parse a YAML character
## stream, the ``TagLibrary`` of the parser is the one which generates
## the resulting ``TagId`` s.
##
## URI strings are mapped to ``TagId`` s for efficiency reasons (you
## do not need to compare strings every time) and to be able to
## discover unknown tag URIs early in the parsing process.
ScalarStyle* = enum
## Original style of the scalar (for input),
## or desired style of the scalar (for output).
ssAny, ssPlain, ssSingleQuoted, ssDoubleQuoted, ssLiteral, ssFolded
CollectionStyle* = enum
csBlock, csFlow
EventKind* = enum
## Kinds of YAML events that may occur in an ``YamlStream``. Event kinds
## are discussed in `YamlStreamEvent <#YamlStreamEvent>`_.
yamlStartStream, yamlEndStream,
yamlStartDoc, yamlEndDoc, yamlStartMap, yamlEndMap,
yamlStartSeq, yamlEndSeq, yamlScalar, yamlAlias
Event* = object
## An element from a `YamlStream <#YamlStream>`_. Events that start an
## object (``yamlStartMap``, ``yamlStartSeq``, ``yamlScalar``) have
## an optional anchor and a tag associated with them. The anchor will be
## set to ``yAnchorNone`` if it doesn't exist.
##
## A missing tag in the YAML character stream generates
## the non-specific tags ``?`` or ``!`` according to the YAML
## specification. These are by convention mapped to the ``TagId`` s
## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively.
## Mapping is done by a `TagLibrary <#TagLibrary>`_.
##
## ``startPos`` and ``endPos`` are only relevant for events from an input
## stream - they are generally ignored if used with events that generate
## output.
startPos*, endPos*: Mark
case kind*: EventKind
of yamlStartStream, yamlEndStream: discard
of yamlStartMap:
mapProperties*: Properties
mapStyle*: CollectionStyle
of yamlStartSeq:
seqProperties*: Properties
seqStyle*: CollectionStyle
of yamlScalar:
scalarProperties*: Properties
scalarStyle* : ScalarStyle
scalarContent*: string
of yamlStartDoc:
explicitDirectivesEnd*: bool
version*: string
of yamlEndDoc:
explicitDocumentEnd*: bool
of yamlEndMap, yamlEndSeq: discard
of yamlAlias:
aliasTarget* : Anchor
Mark* = tuple[line, column: Positive]
Properties* = tuple[anchor: Anchor, tag: TagId]
const
yAnchorNone*: Anchor = "".Anchor ## \
## yielded when no anchor was defined for a YAML node
defaultMark: Mark = (1.Positive, 1.Positive) ## \
## used for events that are not generated from input.
yTagExclamationMark*: TagId = 0.TagId ## ``!`` non-specific tag
yTagQuestionMark* : TagId = 1.TagId ## ``?`` non-specific tag
# failsafe schema
yTagString* : TagId = 2.TagId ## \
## `!!str <http://yaml.org/type/str.html >`_ tag
yTagSequence* : TagId = 3.TagId ## \
## `!!seq <http://yaml.org/type/seq.html>`_ tag
yTagMapping* : TagId = 4.TagId ## \
## `!!map <http://yaml.org/type/map.html>`_ tag
# json & core schema
yTagNull* : TagId = 5.TagId ## \
## `!!null <http://yaml.org/type/null.html>`_ tag
yTagBoolean* : TagId = 6.TagId ## \
## `!!bool <http://yaml.org/type/bool.html>`_ tag
yTagInteger* : TagId = 7.TagId ## \
## `!!int <http://yaml.org/type/int.html>`_ tag
yTagFloat* : TagId = 8.TagId ## \
## `!!float <http://yaml.org/type/float.html>`_ tag
# other language-independent YAML types (from http://yaml.org/type/ )
yTagOrderedMap* : TagId = 9.TagId ## \
## `!!omap <http://yaml.org/type/omap.html>`_ tag
yTagPairs* : TagId = 10.TagId ## \
## `!!pairs <http://yaml.org/type/pairs.html>`_ tag
yTagSet* : TagId = 11.TagId ## \
## `!!set <http://yaml.org/type/set.html>`_ tag
yTagBinary* : TagId = 12.TagId ## \
## `!!binary <http://yaml.org/type/binary.html>`_ tag
yTagMerge* : TagId = 13.TagId ## \
## `!!merge <http://yaml.org/type/merge.html>`_ tag
yTagTimestamp* : TagId = 14.TagId ## \
## `!!timestamp <http://yaml.org/type/timestamp.html>`_ tag
yTagValue* : TagId = 15.TagId ## \
## `!!value <http://yaml.org/type/value.html>`_ tag
yTagYaml* : TagId = 16.TagId ## \
## `!!yaml <http://yaml.org/type/yaml.html>`_ tag
yTagNimField* : TagId = 100.TagId ## \
## This tag is used in serialization for the name of a field of an
## object. It may contain any string scalar that is a valid Nim symbol.
yFirstStaticTagId* : TagId = 1000.TagId ## \
## The first ``TagId`` assigned by the ``setTagId`` templates.
yFirstCustomTagId* : TagId = 10000.TagId ## \
## The first ``TagId`` which should be assigned to an URI that does not
## exist in the ``YamlTagLibrary`` which is used for parsing.
yamlTagRepositoryPrefix* = "tag:yaml.org,2002:"
nimyamlTagRepositoryPrefix* = "tag:nimyaml.org,2016:"
proc properties*(event: Event): Properties =
## returns the tag of the given event
case event.kind
of yamlStartMap: result = event.mapProperties
of yamlStartSeq: result = event.seqProperties
of yamlScalar: result = event.scalarProperties
else: raise newException(FieldDefect, "Event " & $event.kind & " has no properties")
proc collectionStyle*(event: Event): CollectionStyle =
## returns the style of the given collection start event
case event.kind
of yamlStartMap: result = event.mapStyle
of yamlStartSeq: result = event.seqStyle
else: raise (ref FieldDefect)(msg: "Event " & $event.kind & " has no collectionStyle")
proc startDocEvent*(explicit: bool = false, startPos, endPos: Mark = defaultMark): Event
{.inline, raises: [].} =
## creates a new event that marks the start of a YAML document
result = Event(startPos: startPos, endPos: endPos,
kind: yamlStartDoc,
explicitDirectivesEnd: explicit)
proc endDocEvent*(explicit: bool = false, startPos, endPos: Mark = defaultMark): Event
{.inline, raises: [].} =
## creates a new event that marks the end of a YAML document
result = Event(startPos: startPos, endPos: endPos,
kind: yamlEndDoc, explicitDocumentEnd: explicit)
proc startMapEvent*(style: CollectionStyle, props: Properties,
startPos, endPos: Mark): Event {.inline, raises: [].} =
## creates a new event that marks the start of a YAML mapping
result = Event(startPos: startPos, endPos: endPos,
kind: yamlStartMap, mapProperties: props,
mapStyle: style)
proc startMapEvent*(style: CollectionStyle,
tag: TagId = yTagQuestionMark,
anchor: Anchor = yAnchorNone,
startPos, endPos: Mark): Event {.inline.} =
return startMapEvent(style, (anchor, tag), startPos, endPos)
proc endMapEvent*(startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} =
## creates a new event that marks the end of a YAML mapping
result = Event(startPos: startPos, endPos: endPos, kind: yamlEndMap)
proc startSeqEvent*(style: CollectionStyle,
props: Properties,
startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} =
## creates a new event that marks the beginning of a YAML sequence
result = Event(startPos: startPos, endPos: endPos,
kind: yamlStartSeq, seqProperties: props,
seqStyle: style)
proc startSeqEvent*(style: CollectionStyle,
tag: TagId = yTagQuestionMark,
anchor: Anchor = yAnchorNone,
startPos, endPos: Mark = defaultMark): Event {.inline.} =
return startSeqEvent(style, (anchor, tag), startPos, endPos)
proc endSeqEvent*(startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} =
## creates a new event that marks the end of a YAML sequence
result = Event(startPos: startPos, endPos: endPos, kind: yamlEndSeq)
proc scalarEvent*(content: string, props: Properties,
style: ScalarStyle = ssAny,
startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} =
## creates a new event that represents a YAML scalar
result = Event(startPos: startPos, endPos: endPos,
kind: yamlScalar, scalarProperties: props,
scalarContent: content, scalarStyle: style)
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
anchor: Anchor = yAnchorNone,
style: ScalarStyle = ssAny,
startPos, endPos: Mark = defaultMark): Event {.inline.} =
return scalarEvent(content, (anchor, tag), style, startPos, endPos)
proc aliasEvent*(target: Anchor, startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} =
## creates a new event that represents a YAML alias
result = Event(startPos: startPos, endPos: endPos, kind: yamlAlias, aliasTarget: target)
proc `==`*(left, right: Anchor): bool {.borrow.}
proc `$`*(id: Anchor): string {.borrow.}
proc hash*(id: Anchor): Hash {.borrow.}
proc `==`*(left, right: TagId): bool {.borrow.}
proc hash*(id: TagId): Hash {.borrow.}
proc `$`*(id: TagId): string {.raises: [].} =
case id
of yTagQuestionMark: "?"
of yTagExclamationMark: "!"
of yTagString: "!!str"
of yTagSequence: "!!seq"
of yTagMapping: "!!map"
of yTagNull: "!!null"
of yTagBoolean: "!!bool"
of yTagInteger: "!!int"
of yTagFloat: "!!float"
of yTagOrderedMap: "!!omap"
of yTagPairs: "!!pairs"
of yTagSet: "!!set"
of yTagBinary: "!!binary"
of yTagMerge: "!!merge"
of yTagTimestamp: "!!timestamp"
of yTagValue: "!!value"
of yTagYaml: "!!yaml"
of yTagNimField: "!nim:field"
else: "<" & $int(id) & ">"
proc `==`*(left: Event, right: Event): bool {.raises: [].} =
## compares all existing fields of the given items
if left.kind != right.kind: return false
case left.kind
of yamlStartStream, yamlEndStream, yamlStartDoc, yamlEndDoc, yamlEndMap, yamlEndSeq:
result = true
of yamlStartMap:
result = left.mapProperties == right.mapProperties
of yamlStartSeq:
result = left.seqProperties == right.seqProperties
of yamlScalar:
result = left.scalarProperties == right.scalarProperties and
left.scalarContent == right.scalarContent
of yamlAlias: result = left.aliasTarget == right.aliasTarget
proc renderAttrs*(props: Properties, isPlain: bool = true): string =
result = ""
if props.anchor != yAnchorNone: result &= " &" & $props.anchor
case props.tag
of yTagQuestionmark: discard
of yTagExclamationmark:
if isPlain: result &= " <!>"
else:
result &= " <" & $props.tag & ">"
proc `$`*(event: Event): string {.raises: [].} =
## outputs a human-readable string describing the given event.
## This string is compatible to the format used in the yaml test suite.
case event.kind
of yamlStartStream: result = "+STR"
of yamlEndStream: result = "-STR"
of yamlEndMap: result = "-MAP"
of yamlEndSeq: result = "-SEQ"
of yamlStartDoc:
result = "+DOC"
if event.explicitDirectivesEnd: result &= " ---"
of yamlEndDoc:
result = "-DOC"
if event.explicitDocumentEnd: result &= " ..."
of yamlStartMap: result = "+MAP" & renderAttrs(event.mapProperties)
of yamlStartSeq: result = "+SEQ" & renderAttrs(event.mapProperties)
of yamlScalar:
result = "=VAL" & renderAttrs(event.scalarProperties,
event.scalarStyle == ssPlain or
event.scalarStyle == ssAny)
case event.scalarStyle
of ssPlain, ssAny: result &= " :"
of ssSingleQuoted: result &= " \'"
of ssDoubleQuoted: result &= " \""
of ssLiteral: result &= " |"
of ssFolded: result &= " >"
result &= yamlTestSuiteEscape(event.scalarContent)
of yamlAlias: result = "=ALI *" & $event.aliasTarget

File diff suppressed because it is too large Load Diff

View File

@ -5,69 +5,60 @@
# distribution, for details about the copyright. # distribution, for details about the copyright.
import lexbase, streams, strutils, unicode import lexbase, streams, strutils, unicode
import ../data
when defined(yamlDebug): when defined(yamlDebug):
import terminal import terminal
export terminal export terminal
when defined(yamlScalarRepInd):
type ScalarKind* = enum
skSingleQuoted, skDoubleQuoted, skLiteral, skFolded
type type
YamlLexerObj* = object Lexer* = object
cur*: LexerToken cur*: Token
curStartPos*, curEndPos*: tuple[line, column: int] curStartPos*, curEndPos*: Mark
# recently read scalar or URI, if any # recently read scalar or URI, if any
buf*: string evaluated*: string
# ltIndentation # ltIndentation
indentation*: int indentation*: int
when defined(yamlScalarRepInd):
# ltQuotedScalar, ltBlockScalarHeader
scalarKind*: ScalarKind
# internals # internals
source: BaseLexer source: BaseLexer
tokenStart: int tokenStart: int
flowDepth: int flowDepth: int
state, lineStartState, jsonEnablingState: LexerState state, lineStartState, jsonEnablingState: State
c: char c: char
seenMultiline: bool seenMultiline: bool
# indentation of recently started set of node properties. # indentation of recently started set of node properties.
# necessary for implicit keys with properties. # necessary for implicit keys with properties.
propertyIndentation: int propertyIndentation: int
YamlLexer* = ref YamlLexerObj LexerError* = object of ValueError
YamlLexerError* = object of ValueError
line*, column*: int line*, column*: int
lineContent*: string lineContent*: string
# temporarily missing .raises: [YamlLexerError] # temporarily missing .raises: [LexerError]
# due to https://github.com/nim-lang/Nim/issues/13905 # due to https://github.com/nim-lang/Nim/issues/13905
LexerState = proc(lex: YamlLexer): bool {.locks: 0, gcSafe.} State = proc(lex: var Lexer): bool {.locks: 0, gcSafe, nimcall.}
LexerToken* = enum Token* {.pure.} = enum
ltYamlDirective, # `%YAML` YamlDirective, # `%YAML`
ltTagDirective, # `%TAG` TagDirective, # `%TAG`
ltUnknownDirective, # any directive but `%YAML` and `%TAG` UnknownDirective, # any directive but `%YAML` and `%TAG`
ltDirectiveParam, # parameters of %YAML and unknown directives DirectiveParam, # parameters of %YAML and unknown directives
ltEmptyLine, # for line folding in multiline plain scalars EmptyLine, # for line folding in multiline plain scalars
ltDirectivesEnd, # explicit `---` DirectivesEnd, # explicit `---`
ltDocumentEnd, # explicit `...` DocumentEnd, # explicit `...`
ltStreamEnd, # end of input StreamEnd, # end of input
ltIndentation, # beginning of non-empty line Indentation, # beginning of non-empty line
ltPlainScalar, ltSingleQuotedScalar, ltDoubleQuotedScalar, Plain, SingleQuoted, DoubleQuoted, Literal, Folded,
ltLiteralScalar, ltFoldedScalar, SeqItemInd, # block sequence item indicator `- `
ltSeqItemInd, # block sequence item indicator `- ` MapKeyInd, # block mapping key indicator `? `
ltMapKeyInd, # block mapping key indicator `? ` MapValueInd # block mapping value indicator `: `
ltMapValueInd # block mapping value indicator `: ` MapStart, MapEnd, SeqStart, SeqEnd, SeqSep # {}[],
ltMapStart, ltMapEnd, ltSeqStart, ltSeqEnd, ltSeqSep # {}[], TagHandle, # a handle of a tag, e.g. `!!` of `!!str`
ltTagHandle, # a handle of a tag, e.g. `!!` of `!!str` Suffix, # suffix of a tag shorthand, e.g. `str` of `!!str`.
ltSuffix, # suffix of a tag shorthand, e.g. `str` of `!!str`.
# also used for the URI of the %TAG directive # also used for the URI of the %TAG directive
ltVerbatimTag, # a verbatim tag, e.g. `!<tag:yaml.org,2002:str>` VerbatimTag, # a verbatim tag, e.g. `!<tag:yaml.org,2002:str>`
ltAnchor, # anchor property of a node, e.g. `&anchor` Anchor, # anchor property of a node, e.g. `&anchor`
ltAlias # alias node, e.g. `*alias` Alias # alias node, e.g. `*alias`
ChompType* = enum ChompType* = enum
ctKeep, ctClip, ctStrip ctKeep, ctClip, ctStrip
@ -91,7 +82,9 @@ const
tagShorthandChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-'} tagShorthandChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-'}
suffixChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', '@', suffixChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', '@',
'&', '=', '+', '$', '_', '.', '!', '~', '*', '\'', '-'} '&', '=', '+', '$', '_', '.', '!', '~', '*', '\'', '-'}
nodePropertyKind = {ltTagHandle, ltVerbatimTag, ltAnchor} nodePropertyKind* = {Token.TagHandle, Token.VerbatimTag, Token.Anchor}
scalarTokenKind* = {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted,
Token.Literal, Token.Folded}
UTF8NextLine = toUTF8(0x85.Rune) UTF8NextLine = toUTF8(0x85.Rune)
UTF8NonBreakingSpace = toUTF8(0xA0.Rune) UTF8NonBreakingSpace = toUTF8(0xA0.Rune)
@ -102,11 +95,11 @@ const
# lexer source handling # lexer source handling
proc advance(lex: YamlLexer, step: int = 1) {.inline.} = proc advance(lex: var Lexer, step: int = 1) {.inline.} =
lex.source.bufpos.inc(step) lex.source.bufpos.inc(step)
lex.c = lex.source.buf[lex.source.bufpos] lex.c = lex.source.buf[lex.source.bufpos]
template lexCR(lex: YamlLexer) = template lexCR(lex: var Lexer) =
try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos) try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos)
except: except:
var e = lex.generateError("Encountered stream error: " & var e = lex.generateError("Encountered stream error: " &
@ -115,7 +108,7 @@ template lexCR(lex: YamlLexer) =
raise e raise e
lex.c = lex.source.buf[lex.source.bufpos] lex.c = lex.source.buf[lex.source.bufpos]
template lexLF(lex: YamlLexer) = template lexLF(lex: var Lexer) =
try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos) try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos)
except: except:
var e = generateError(lex, "Encountered stream error: " & var e = generateError(lex, "Encountered stream error: " &
@ -124,22 +117,22 @@ template lexLF(lex: YamlLexer) =
raise e raise e
lex.c = lex.source.buf[lex.source.bufpos] lex.c = lex.source.buf[lex.source.bufpos]
template lineNumber(lex: YamlLexer): int = template lineNumber(lex: Lexer): Positive =
lex.source.lineNumber lex.source.lineNumber
template columnNumber(lex: YamlLexer): int = template columnNumber(lex: Lexer): Positive =
lex.source.getColNumber(lex.source.bufpos) + 1 lex.source.getColNumber(lex.source.bufpos) + 1
template currentLine(lex: YamlLexer): string = template currentLine(lex: Lexer): string =
lex.source.getCurrentLine(true) lex.source.getCurrentLine(true)
proc Safe(lex: YamlLexer): bool {.inline.} = proc isPlainSafe(lex: Lexer): bool {.inline.} =
case lex.source.buf[lex.source.bufpos + 1] case lex.source.buf[lex.source.bufpos + 1]
of spaceOrLineEnd: result = false of spaceOrLineEnd: result = false
of flowIndicators: result = lex.flowDepth == 0 of flowIndicators: result = lex.flowDepth == 0
else: result = true else: result = true
proc lineWithMarker(lex: YamlLexer, pos: tuple[line, column: int], proc lineWithMarker(lex: Lexer, pos: tuple[line, column: int],
marker: bool): string = marker: bool): string =
if pos.line == lex.source.lineNumber: if pos.line == lex.source.lineNumber:
result = lex.source.getCurrentLine(false) result = lex.source.getCurrentLine(false)
@ -150,25 +143,25 @@ proc lineWithMarker(lex: YamlLexer, pos: tuple[line, column: int],
{.push gcSafe, locks: 0.} {.push gcSafe, locks: 0.}
# `raises` cannot be pushed. # `raises` cannot be pushed.
proc outsideDoc(lex: YamlLexer): bool {.raises: [].} proc outsideDoc(lex: var Lexer): bool {.raises: [].}
proc yamlVersion(lex: YamlLexer): bool {.raises: YamlLexerError.} proc yamlVersion(lex: var Lexer): bool {.raises: LexerError.}
proc tagShorthand(lex: YamlLexer): bool {.raises: YamlLexerError.} proc tagShorthand(lex: var Lexer): bool {.raises: LexerError.}
proc tagUri(lex: YamlLexer): bool {.raises: YamlLexerError.} proc tagUri(lex: var Lexer): bool {.raises: LexerError.}
proc unknownDirParams(lex: YamlLexer): bool {.raises: [].} proc unknownDirParams(lex: var Lexer): bool {.raises: [].}
proc expectLineEnd(lex: YamlLexer): bool {.raises: YamlLexerError.} proc expectLineEnd(lex: var Lexer): bool {.raises: LexerError.}
proc lineStart(lex: YamlLexer): bool {.raises: YamlLexerError.} proc lineStart(lex: var Lexer): bool {.raises: LexerError.}
proc flowLineStart(lex: YamlLexer): bool {.raises: YamlLexerError.} proc flowLineStart(lex: var Lexer): bool {.raises: LexerError.}
proc flowLineIndentation(lex: YamlLexer): bool {.raises: YamlLexerError.} proc flowLineIndentation(lex: var Lexer): bool {.raises: LexerError.}
proc insideLine(lex: YamlLexer): bool {.raises: YamlLexerError.} proc insideLine(lex: var Lexer): bool {.raises: LexerError.}
proc indentationSettingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} proc indentationSettingToken(lex: var Lexer): bool {.raises: LexerError.}
proc afterToken(lex: YamlLexer): bool {.raises: YamlLexerError.} proc afterToken(lex: var Lexer): bool {.raises: LexerError.}
proc beforeIndentationSettingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} proc beforeIndentationSettingToken(lex: var Lexer): bool {.raises: LexerError.}
proc afterJsonEnablingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} proc afterJsonEnablingToken(lex: var Lexer): bool {.raises: LexerError.}
proc lineIndentation(lex: YamlLexer): bool {.raises: [].} proc lineIndentation(lex: var Lexer): bool {.raises: [].}
proc lineDirEnd(lex: YamlLexer): bool {.raises: [].} proc lineDirEnd(lex: var Lexer): bool {.raises: [].}
proc lineDocEnd(lex: YamlLexer): bool {.raises: [].} proc lineDocEnd(lex: var Lexer): bool {.raises: [].}
proc atSuffix(lex: YamlLexer): bool {.raises: [].} proc atSuffix(lex: var Lexer): bool {.raises: [].}
proc streamEnd(lex: YamlLexer): bool {.raises: [].} proc streamEnd(lex: var Lexer): bool {.raises: [].}
{.pop.} {.pop.}
# helpers # helpers
@ -178,28 +171,28 @@ template debug(message: string) {.dirty.} =
try: styledWriteLine(stdout, fgBlue, message) try: styledWriteLine(stdout, fgBlue, message)
except IOError: discard except IOError: discard
proc generateError(lex: YamlLexer, message: string): proc generateError(lex: Lexer, message: string):
ref YamlLexerError {.raises: [].} = ref LexerError {.raises: [].} =
result = newException(YamlLexerError, message) result = newException(LexerError, message)
result.line = lex.lineNumber() result.line = lex.lineNumber()
result.column = lex.columnNumber() result.column = lex.columnNumber()
result.lineContent = lex.currentLine() result.lineContent = lex.currentLine()
proc startToken(lex: YamlLexer) {.inline.} = proc startToken(lex: var Lexer) {.inline.} =
lex.curStartPos = (lex.lineNumber(), lex.columnNumber()) lex.curStartPos = (line: lex.lineNumber(), column: lex.columnNumber())
lex.tokenStart = lex.source.bufpos lex.tokenStart = lex.source.bufpos
proc endToken(lex: YamlLexer) {.inline.} = proc endToken(lex: var Lexer) {.inline.} =
lex.curEndPos = (lex.lineNumber(), lex.columnNumber()) lex.curEndPos = (line: lex.lineNumber(), column: lex.columnNumber())
proc readNumericSubtoken(lex: YamlLexer) {.inline.} = proc readNumericSubtoken(lex: var Lexer) {.inline.} =
if lex.c notin digits: if lex.c notin digits:
raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c)) raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c))
while true: while true:
lex.advance() lex.advance()
if lex.c notin digits: break if lex.c notin digits: break
proc isDirectivesEnd(lex: YamlLexer): bool = proc isDirectivesEnd(lex: var Lexer): bool =
var peek = lex.source.bufpos var peek = lex.source.bufpos
if lex.source.buf[peek] == '-': if lex.source.buf[peek] == '-':
peek += 1 peek += 1
@ -211,7 +204,7 @@ proc isDirectivesEnd(lex: YamlLexer): bool =
return true return true
return false return false
proc isDocumentEnd(lex: YamlLexer): bool = proc isDocumentEnd(lex: var Lexer): bool =
var peek = lex.source.bufpos var peek = lex.source.bufpos
if lex.source.buf[peek] == '.': if lex.source.buf[peek] == '.':
peek += 1 peek += 1
@ -223,7 +216,7 @@ proc isDocumentEnd(lex: YamlLexer): bool =
return true return true
return false return false
proc readHexSequence(lex: YamlLexer, len: int) = proc readHexSequence(lex: var Lexer, len: int) =
var charPos = 0 var charPos = 0
let startPos = lex.source.bufpos let startPos = lex.source.bufpos
for i in countup(0, len-1): for i in countup(0, len-1):
@ -244,10 +237,10 @@ proc readHexSequence(lex: YamlLexer, len: int) =
charPos += coeff * (int(lex.c) - int('A') + 10) charPos += coeff * (int(lex.c) - int('A') + 10)
else: discard # cannot happen, we checked else: discard # cannot happen, we checked
coeff = coeff div 16 coeff = coeff div 16
lex.buf.add($Rune(charPos)) lex.evaluated.add($Rune(charPos))
proc readURI(lex: YamlLexer) = proc readURI(lex: var Lexer) =
lex.buf.setLen(0) lex.evaluated.setLen(0)
let endWithSpace = lex.c != '<' let endWithSpace = lex.c != '<'
let restricted = lex.flowDepth > 0 let restricted = lex.flowDepth > 0
var literalStart: int var literalStart: int
@ -262,17 +255,17 @@ proc readURI(lex: YamlLexer) =
case lex.c case lex.c
of spaceOrLineEnd: of spaceOrLineEnd:
if endWithSpace: if endWithSpace:
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
break break
raise lex.generateError("Unclosed verbatim tag") raise lex.generateError("Unclosed verbatim tag")
of '%': of '%':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.readHexSequence(2) lex.readHexSequence(2)
literalStart = lex.source.bufpos literalStart = lex.source.bufpos
of uriChars: discard of uriChars: discard
of '[', ']', ',': of '[', ']', ',':
if restricted: if restricted:
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
break break
of '!': of '!':
if restricted: if restricted:
@ -280,14 +273,14 @@ proc readURI(lex: YamlLexer) =
of '>': of '>':
if endWithSpace: if endWithSpace:
raise lex.generateError("Illegal character in URI: `>`") raise lex.generateError("Illegal character in URI: `>`")
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.advance() lex.advance()
break break
else: else:
raise lex.generateError("Illegal character in URI: " & escape("" & lex.c)) raise lex.generateError("Illegal character in URI: " & escape("" & lex.c))
lex.advance() lex.advance()
proc endLine(lex: YamlLexer) = proc endLine(lex: var Lexer) =
while true: while true:
case lex.c case lex.c
of '\l': of '\l':
@ -307,7 +300,7 @@ proc endLine(lex: YamlLexer) =
if lex.c in lineEnd: break if lex.c in lineEnd: break
else: discard else: discard
proc startLine(lex: YamlLexer): LineStartType = proc startLine(lex: var Lexer): LineStartType =
case lex.c case lex.c
of '-': of '-':
return if lex.isDirectivesEnd(): lsDirectivesEndMarker return if lex.isDirectivesEnd(): lsDirectivesEndMarker
@ -323,8 +316,8 @@ proc startLine(lex: YamlLexer): LineStartType =
of EndOfFile: lsStreamEnd of EndOfFile: lsStreamEnd
else: lsContent else: lsContent
proc readPlainScalar(lex: YamlLexer) = proc readPlainScalar(lex: var Lexer) =
lex.buf.setLen(0) lex.evaluated.setLen(0)
let afterNewlineState = if lex.flowDepth == 0: lineIndentation let afterNewlineState = if lex.flowDepth == 0: lineIndentation
else: flowLineIndentation else: flowLineIndentation
var lineStartPos: int var lineStartPos: int
@ -333,7 +326,7 @@ proc readPlainScalar(lex: YamlLexer) =
if lex.propertyIndentation != -1: if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1 lex.propertyIndentation = -1
lex.cur = ltPlainScalar lex.cur = Token.Plain
block multilineLoop: block multilineLoop:
while true: while true:
lineStartPos = lex.source.bufpos - 1 lineStartPos = lex.source.bufpos - 1
@ -348,48 +341,48 @@ proc readPlainScalar(lex: YamlLexer) =
lex.advance() lex.advance()
case lex.c case lex.c
of '\l', '\c': of '\l', '\c':
lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd])
break inlineLoop break inlineLoop
of EndOfFile: of EndOfFile:
lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd])
lex.state = streamEnd lex.state = streamEnd
break multilineLoop break multilineLoop
of '#': of '#':
lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd])
lex.state = expectLineEnd lex.state = expectLineEnd
break multilineLoop break multilineLoop
of ':': of ':':
if not lex.Safe(): if not lex.isPlainSafe():
lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd])
lex.state = insideLine lex.state = insideLine
break multilineLoop break multilineLoop
break spaceLoop break spaceLoop
of flowIndicators: of flowIndicators:
if lex.flowDepth > 0: if lex.flowDepth > 0:
lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd])
lex.state = insideLine lex.state = insideLine
break multilineLoop break multilineLoop
break spaceLoop break spaceLoop
of ' ': discard of ' ': discard
else: break spaceLoop else: break spaceLoop
of ':': of ':':
if not lex.Safe(): if not lex.isPlainSafe():
lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
lex.endToken() lex.endToken()
lex.state = insideLine lex.state = insideLine
break multilineLoop break multilineLoop
of flowIndicators: of flowIndicators:
if lex.flowDepth > 0: if lex.flowDepth > 0:
lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
lex.endToken() lex.endToken()
lex.state = insideLine lex.state = insideLine
break multilineLoop break multilineLoop
of '\l', '\c': of '\l', '\c':
lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
lex.endToken() lex.endToken()
break inlineLoop break inlineLoop
of EndOfFile: of EndOfFile:
lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2])
if lex.columnNumber() > 0: if lex.columnNumber() > 0:
lex.endToken() lex.endToken()
lex.state = streamEnd lex.state = streamEnd
@ -419,31 +412,31 @@ proc readPlainScalar(lex: YamlLexer) =
break multilineLoop break multilineLoop
of lsNewline: lex.endLine() of lsNewline: lex.endLine()
newlines += 1 newlines += 1
if (lex.c == ':' and not lex.Safe()) or if (lex.c == ':' and not lex.isPlainSafe()) or
lex.c == '#' or (lex.c in flowIndicators and lex.c == '#' or (lex.c in flowIndicators and
lex.flowDepth > 0): lex.flowDepth > 0):
lex.state = afterNewlineState lex.state = afterNewlineState
break multilineLoop break multilineLoop
lex.seenMultiline = true lex.seenMultiline = true
if newlines == 1: lex.buf.add(' ') if newlines == 1: lex.evaluated.add(' ')
else: else:
for i in countup(2, newlines): lex.buf.add('\l') for i in countup(2, newlines): lex.evaluated.add('\l')
proc streamEndAfterBlock(lex: YamlLexer) = proc streamEndAfterBlock(lex: var Lexer) =
if lex.columnNumber() != 0: if lex.columnNumber() != 0:
lex.endToken() lex.endToken()
lex.curEndPos.column -= 1 lex.curEndPos.column -= 1
proc readBlockScalar(lex: YamlLexer) = proc readBlockScalar(lex: var Lexer) =
var var
chomp = ctClip chomp = ctClip
indent = 0 indent = 0
separationLines = 0 separationLines = 0
contentStart: int contentStart: int
lex.startToken() lex.startToken()
lex.cur = if lex.c == '>': ltFoldedScalar else: ltLiteralScalar lex.cur = if lex.c == '>': Token.Folded else: Token.Literal
lex.buf.setLen(0) lex.evaluated.setLen(0)
# header # header
while true: while true:
@ -506,12 +499,12 @@ proc readBlockScalar(lex: YamlLexer) =
elif lex.columnNumber < indent: break body elif lex.columnNumber < indent: break body
break break
for i in countup(0, separationLines - 1): for i in countup(0, separationLines - 1):
lex.buf.add('\l') lex.evaluated.add('\l')
block content: block content:
contentStart = lex.source.bufpos - 1 contentStart = lex.source.bufpos - 1
while lex.c notin lineEnd: lex.advance() while lex.c notin lineEnd: lex.advance()
lex.buf.add(lex.buf[contentStart .. lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[contentStart .. lex.source.bufpos - 2])
separationLines = 0 separationLines = 0
if lex.c == EndOfFile: if lex.c == EndOfFile:
lex.state = streamEnd lex.state = streamEnd
@ -540,14 +533,14 @@ proc readBlockScalar(lex: YamlLexer) =
else: break else: break
# line folding # line folding
if lex.cur == ltLiteralScalar: if lex.cur == Token.Literal:
for i in countup(0, separationLines - 1): for i in countup(0, separationLines - 1):
lex.buf.add('\l') lex.evaluated.add('\l')
elif separationLines == 1: elif separationLines == 1:
lex.buf.add(' ') lex.evaluated.add(' ')
else: else:
for i in countup(0, separationLines - 2): for i in countup(0, separationLines - 2):
lex.buf.add('\l') lex.evaluated.add('\l')
if lex.columnNumber() > max(0, lex.indentation): if lex.columnNumber() > max(0, lex.indentation):
if lex.c == '#': if lex.c == '#':
@ -564,13 +557,13 @@ proc readBlockScalar(lex: YamlLexer) =
case chomp case chomp
of ctStrip: discard of ctStrip: discard
of ctClip: of ctClip:
if len(lex.buf) > 0: if len(lex.evaluated) > 0:
lex.buf.add('\l') lex.evaluated.add('\l')
of ctKeep: of ctKeep:
for i in countup(0, separationLines - 1): for i in countup(0, separationLines - 1):
lex.buf.add('\l') lex.evaluated.add('\l')
proc processQuotedWhitespace(lex: YamlLexer, initial: int) = proc processQuotedWhitespace(lex: var Lexer, initial: int) =
var newlines = initial var newlines = initial
let firstSpace = lex.source.bufpos - 1 let firstSpace = lex.source.bufpos - 1
while true: while true:
@ -583,7 +576,7 @@ proc processQuotedWhitespace(lex: YamlLexer, initial: int) =
lex.lexCR() lex.lexCR()
break break
else: else:
lex.buf.add(lex.source.buf[firstSpace..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[firstSpace..lex.source.bufpos - 2])
return return
lex.advance() lex.advance()
lex.seenMultiline = true lex.seenMultiline = true
@ -599,14 +592,14 @@ proc processQuotedWhitespace(lex: YamlLexer, initial: int) =
raise lex.generateError("Unclosed quoted string") raise lex.generateError("Unclosed quoted string")
newlines += 1 newlines += 1
if newlines == 0: discard if newlines == 0: discard
elif newlines == 1: lex.buf.add(' ') elif newlines == 1: lex.evaluated.add(' ')
else: else:
for i in countup(2, newlines): lex.buf.add('\l') for i in countup(2, newlines): lex.evaluated.add('\l')
proc readSingleQuotedScalar(lex: YamlLexer) = proc readSingleQuotedScalar(lex: var Lexer) =
lex.seenMultiline = false lex.seenMultiline = false
lex.startToken() lex.startToken()
lex.buf.setLen(0) lex.evaluated.setLen(0)
if lex.propertyIndentation != -1: if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1 lex.propertyIndentation = -1
@ -617,26 +610,26 @@ proc readSingleQuotedScalar(lex: YamlLexer) =
of EndOfFile: of EndOfFile:
raise lex.generateError("Unclosed quoted string") raise lex.generateError("Unclosed quoted string")
of '\'': of '\'':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.advance() lex.advance()
if lex.c == '\'': if lex.c == '\'':
lex.buf.add('\'') lex.evaluated.add('\'')
literalStart = lex.source.bufpos literalStart = lex.source.bufpos
lex.advance() lex.advance()
else: break else: break
of ' ', '\l', '\c': of ' ', '\l', '\c':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.processQuotedWhitespace(1) lex.processQuotedWhitespace(1)
literalStart = lex.source.bufpos - 1 literalStart = lex.source.bufpos - 1
else: else:
lex.advance() lex.advance()
lex.endToken() lex.endToken()
lex.cur = ltSingleQuotedScalar lex.cur = Token.SingleQuoted
proc readDoubleQuotedScalar(lex: YamlLexer) = proc readDoubleQuotedScalar(lex: var Lexer) =
lex.seenMultiline = false lex.seenMultiline = false
lex.startToken() lex.startToken()
lex.buf.setLen(0) lex.evaluated.setLen(0)
if lex.propertyIndentation != -1: if lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1 lex.propertyIndentation = -1
@ -647,27 +640,27 @@ proc readDoubleQuotedScalar(lex: YamlLexer) =
of EndOfFile: of EndOfFile:
raise lex.generateError("Unclosed quoted string") raise lex.generateError("Unclosed quoted string")
of '\\': of '\\':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.advance() lex.advance()
literalStart = lex.source.bufpos literalStart = lex.source.bufpos
case lex.c case lex.c
of '0': lex.buf.add('\0') of '0': lex.evaluated.add('\0')
of 'a': lex.buf.add('\a') of 'a': lex.evaluated.add('\a')
of 'b': lex.buf.add('\b') of 'b': lex.evaluated.add('\b')
of 't', '\t': lex.buf.add('\t') of 't', '\t': lex.evaluated.add('\t')
of 'n': lex.buf.add('\l') of 'n': lex.evaluated.add('\l')
of 'v': lex.buf.add('\v') of 'v': lex.evaluated.add('\v')
of 'f': lex.buf.add('\f') of 'f': lex.evaluated.add('\f')
of 'r': lex.buf.add('\c') of 'r': lex.evaluated.add('\c')
of 'e': lex.buf.add('\e') of 'e': lex.evaluated.add('\e')
of ' ': lex.buf.add(' ') of ' ': lex.evaluated.add(' ')
of '"': lex.buf.add('"') of '"': lex.evaluated.add('"')
of '/': lex.buf.add('/') of '/': lex.evaluated.add('/')
of '\\':lex.buf.add('\\') of '\\':lex.evaluated.add('\\')
of 'N': lex.buf.add(UTF8NextLine) of 'N': lex.evaluated.add(UTF8NextLine)
of '_': lex.buf.add(UTF8NonBreakingSpace) of '_': lex.evaluated.add(UTF8NonBreakingSpace)
of 'L': lex.buf.add(UTF8LineSeparator) of 'L': lex.evaluated.add(UTF8LineSeparator)
of 'P': lex.buf.add(UTF8ParagraphSeparator) of 'P': lex.evaluated.add(UTF8ParagraphSeparator)
of 'x': of 'x':
lex.readHexSequence(2) lex.readHexSequence(2)
literalStart = lex.source.bufpos literalStart = lex.source.bufpos
@ -684,10 +677,10 @@ proc readDoubleQuotedScalar(lex: YamlLexer) =
else: else:
raise lex.generateError("Illegal character in escape sequence: " & escape("" & lex.c)) raise lex.generateError("Illegal character in escape sequence: " & escape("" & lex.c))
of '"': of '"':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
break break
of ' ', '\l', '\c': of ' ', '\l', '\c':
lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2])
lex.processQuotedWhitespace(1) lex.processQuotedWhitespace(1)
literalStart = lex.source.bufpos - 1 literalStart = lex.source.bufpos - 1
continue continue
@ -695,46 +688,49 @@ proc readDoubleQuotedScalar(lex: YamlLexer) =
lex.advance() lex.advance()
lex.advance() lex.advance()
lex.endToken() lex.endToken()
lex.cur = ltDoubleQuotedScalar lex.cur = Token.DoubleQuoted
proc basicInit(lex: YamlLexer) = proc basicInit(lex: var Lexer) =
lex.state = outsideDoc lex.state = outsideDoc
lex.flowDepth = 0 lex.flowDepth = 0
lex.lineStartState = outsideDoc lex.lineStartState = outsideDoc
lex.jsonEnablingState = afterToken lex.jsonEnablingState = afterToken
lex.propertyIndentation = -1 lex.propertyIndentation = -1
lex.buf = "" lex.evaluated = ""
lex.advance() lex.advance()
# interface # interface
proc shortLexeme*(lex: YamlLexer): string = proc lastScalarWasMultiline*(lex: Lexer): bool =
result = lex.seenMultiline
proc shortLexeme*(lex: Lexer): string =
return lex.source.buf[lex.tokenStart..lex.source.bufpos-2] return lex.source.buf[lex.tokenStart..lex.source.bufpos-2]
proc fullLexeme*(lex: YamlLexer): string = proc fullLexeme*(lex: Lexer): string =
return lex.source.buf[lex.tokenStart - 1..lex.source.bufpos-2] return lex.source.buf[lex.tokenStart - 1..lex.source.bufpos-2]
proc next*(lex: YamlLexer) = proc currentLine*(lex: Lexer): string =
return lex.source.getCurrentLine(false)
proc next*(lex: var Lexer) =
while not lex.state(lex): discard while not lex.state(lex): discard
debug("lexer -> " & $lex.cur) debug("lexer -> " & $lex.cur)
proc newYamlLexer*(source: Stream): YamlLexer {.raises: [IOError, OSError].} = proc init*(lex: var Lexer, source: Stream) {.raises: [IOError, OSError].} =
result = new(YamlLexerObj) lex.source.open(source)
result.source.open(source) lex.basicInit()
result.basicInit()
proc newYamlLexer*(source: string): YamlLexer proc init*(lex: var Lexer, source: string) {.raises: [].} =
{.raises: [].} =
result = new(YamlLexerObj)
try: try:
result.source.open(newStringStream(source)) lex.source.open(newStringStream(source))
except: except:
discard # can never happen with StringStream discard # can never happen with StringStream
result.basicInit() lex.basicInit()
# states # states
proc outsideDoc(lex: YamlLexer): bool = proc outsideDoc(lex: var Lexer): bool =
case lex.c case lex.c
of '%': of '%':
lex.startToken() lex.startToken()
@ -746,23 +742,23 @@ proc outsideDoc(lex: YamlLexer): bool =
case name case name
of "YAML": of "YAML":
lex.state = yamlVersion lex.state = yamlVersion
lex.cur = ltYamlDirective lex.cur = Token.YamlDirective
of "TAG": of "TAG":
lex.state = tagShorthand lex.state = tagShorthand
lex.cur = ltTagDirective lex.cur = Token.TagDirective
else: else:
lex.state = unknownDirParams lex.state = unknownDirParams
lex.cur = ltUnknownDirective lex.cur = Token.UnknownDirective
lex.buf.setLen(0) lex.evaluated.setLen(0)
lex.buf.add(name) lex.evaluated.add(name)
of '-': of '-':
lex.startToken() lex.startToken()
if lex.isDirectivesEnd(): if lex.isDirectivesEnd():
lex.state = expectLineEnd lex.state = expectLineEnd
lex.cur = ltDocumentEnd lex.cur = Token.DocumentEnd
else: else:
lex.state = indentationSettingToken lex.state = indentationSettingToken
lex.cur = ltIndentation lex.cur = Token.Indentation
lex.lineStartState = lineStart lex.lineStartState = lineStart
lex.indentation = -1 lex.indentation = -1
lex.endToken() lex.endToken()
@ -770,12 +766,12 @@ proc outsideDoc(lex: YamlLexer): bool =
lex.startToken() lex.startToken()
if lex.isDocumentEnd(): if lex.isDocumentEnd():
lex.state = expectLineEnd lex.state = expectLineEnd
lex.cur = ltDocumentEnd lex.cur = Token.DocumentEnd
else: else:
lex.state = indentationSettingToken lex.state = indentationSettingToken
lex.lineStartState = lineStart lex.lineStartState = lineStart
lex.indentation = -1 lex.indentation = -1
lex.cur = ltIndentation lex.cur = Token.Indentation
lex.endToken() lex.endToken()
else: else:
lex.startToken() lex.startToken()
@ -784,12 +780,12 @@ proc outsideDoc(lex: YamlLexer): bool =
lex.state = expectLineEnd lex.state = expectLineEnd
return false return false
lex.endToken() lex.endToken()
lex.cur = ltIndentation lex.cur = Token.Indentation
lex.state = indentationSettingToken lex.state = indentationSettingToken
lex.lineStartState = lineStart lex.lineStartState = lineStart
return true return true
proc yamlVersion(lex: YamlLexer): bool = proc yamlVersion(lex: var Lexer): bool =
debug("lex: yamlVersion") debug("lex: yamlVersion")
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
lex.startToken() lex.startToken()
@ -800,11 +796,11 @@ proc yamlVersion(lex: YamlLexer): bool =
lex.readNumericSubtoken() lex.readNumericSubtoken()
if lex.c notin spaceOrLineEnd: if lex.c notin spaceOrLineEnd:
raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c)) raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c))
lex.cur = ltDirectiveParam lex.cur = Token.DirectiveParam
lex.endToken() lex.endToken()
lex.state = expectLineEnd lex.state = expectLineEnd
proc tagShorthand(lex: YamlLexer): bool = proc tagShorthand(lex: var Lexer): bool =
debug("lex: tagShorthand") debug("lex: tagShorthand")
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c != '!': if lex.c != '!':
@ -823,23 +819,23 @@ proc tagShorthand(lex: YamlLexer): bool =
lex.advance() lex.advance()
if lex.c notin spaceOrLineEnd: if lex.c notin spaceOrLineEnd:
raise lex.generateError("Missing space after tag shorthand") raise lex.generateError("Missing space after tag shorthand")
lex.cur = ltTagHandle lex.cur = Token.TagHandle
lex.endToken() lex.endToken()
lex.state = tagUri lex.state = tagUri
proc tagUri(lex: YamlLexer): bool = proc tagUri(lex: var Lexer): bool =
debug("lex: tagUri") debug("lex: tagUri")
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
lex.startToken() lex.startToken()
if lex.c == '<': if lex.c == '<':
raise lex.generateError("Illegal character in tag URI: " & escape("" & lex.c)) raise lex.generateError("Illegal character in tag URI: " & escape("" & lex.c))
lex.readUri() lex.readUri()
lex.cur = ltSuffix lex.cur = Token.Suffix
lex.endToken() lex.endToken()
lex.state = expectLineEnd lex.state = expectLineEnd
return true return true
proc unknownDirParams(lex: YamlLexer): bool = proc unknownDirParams(lex: var Lexer): bool =
debug("lex: unknownDirParams") debug("lex: unknownDirParams")
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c in lineEnd + {'#'}: if lex.c in lineEnd + {'#'}:
@ -849,10 +845,10 @@ proc unknownDirParams(lex: YamlLexer): bool =
while true: while true:
lex.advance() lex.advance()
if lex.c in lineEnd + {'#'}: break if lex.c in lineEnd + {'#'}: break
lex.cur = ltDirectiveParam lex.cur = Token.DirectiveParam
return true return true
proc expectLineEnd(lex: YamlLexer): bool = proc expectLineEnd(lex: var Lexer): bool =
debug("lex: expectLineEnd") debug("lex: expectLineEnd")
while lex.c in space: lex.advance() while lex.c in space: lex.advance()
if lex.c notin commentOrLineEnd: if lex.c notin commentOrLineEnd:
@ -860,7 +856,7 @@ proc expectLineEnd(lex: YamlLexer): bool =
lex.endLine() lex.endLine()
return false return false
proc lineStart(lex: YamlLexer): bool = proc lineStart(lex: var Lexer): bool =
debug("lex: lineStart") debug("lex: lineStart")
return case lex.startLine() return case lex.startLine()
of lsDirectivesEndMarker: lex.lineDirEnd() of lsDirectivesEndMarker: lex.lineDirEnd()
@ -869,7 +865,7 @@ proc lineStart(lex: YamlLexer): bool =
of lsStreamEnd: lex.state = streamEnd; false of lsStreamEnd: lex.state = streamEnd; false
of lsContent: lex.lineIndentation() of lsContent: lex.lineIndentation()
proc flowLineStart(lex: YamlLexer): bool = proc flowLineStart(lex: var Lexer): bool =
var indent: int var indent: int
case lex.c case lex.c
of '-': of '-':
@ -889,14 +885,14 @@ proc flowLineStart(lex: YamlLexer): bool =
lex.state = insideLine lex.state = insideLine
return false return false
proc flowLineIndentation(lex: YamlLexer): bool = proc flowLineIndentation(lex: var Lexer): bool =
if lex.columnNumber() < lex.indentation: if lex.columnNumber() < lex.indentation:
raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)") raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)")
lex.state = insideLine lex.state = insideLine
return false return false
proc checkIndicatorChar(lex: YamlLexer, kind: LexerToken) = proc checkIndicatorChar(lex: var Lexer, kind: Token) =
if lex.Safe(): if lex.isPlainSafe():
lex.readPlainScalar() lex.readPlainScalar()
else: else:
lex.startToken() lex.startToken()
@ -905,7 +901,7 @@ proc checkIndicatorChar(lex: YamlLexer, kind: LexerToken) =
lex.cur = kind lex.cur = kind
lex.state = beforeIndentationSettingToken lex.state = beforeIndentationSettingToken
proc enterFlowCollection(lex: YamlLexer, kind: LexerToken) = proc enterFlowCollection(lex: var Lexer, kind: Token) =
lex.startToken() lex.startToken()
if lex.flowDepth == 0: if lex.flowDepth == 0:
lex.jsonEnablingState = afterJsonEnablingToken lex.jsonEnablingState = afterJsonEnablingToken
@ -917,7 +913,7 @@ proc enterFlowCollection(lex: YamlLexer, kind: LexerToken) =
lex.endToken() lex.endToken()
lex.cur = kind lex.cur = kind
proc leaveFlowCollection(lex: YamlLexer, kind: LexerToken) = proc leaveFlowCollection(lex: var Lexer, kind: Token) =
lex.startToken() lex.startToken()
if lex.flowDepth == 0: if lex.flowDepth == 0:
raise lex.generateError("No flow collection to leave!") raise lex.generateError("No flow collection to leave!")
@ -930,13 +926,13 @@ proc leaveFlowCollection(lex: YamlLexer, kind: LexerToken) =
lex.endToken() lex.endToken()
lex.cur = kind lex.cur = kind
proc readNamespace(lex: YamlLexer) = proc readNamespace(lex: var Lexer) =
lex.startToken() lex.startToken()
lex.advance() lex.advance()
if lex.c == '<': if lex.c == '<':
lex.readURI() lex.readURI()
lex.endToken() lex.endToken()
lex.cur = ltVerbatimTag lex.cur = Token.VerbatimTag
else: else:
var handleEnd = lex.tokenStart var handleEnd = lex.tokenStart
while true: while true:
@ -956,10 +952,10 @@ proc readNamespace(lex: YamlLexer) =
raise lex.generateError("Illegal character in tag handle: " & escape("" & lex.c)) raise lex.generateError("Illegal character in tag handle: " & escape("" & lex.c))
lex.advance() lex.advance()
lex.endToken() lex.endToken()
lex.cur = ltTagHandle lex.cur = Token.TagHandle
lex.state = atSuffix lex.state = atSuffix
proc readAnchorName(lex: YamlLexer) = proc readAnchorName(lex: var Lexer) =
lex.startToken() lex.startToken()
while true: while true:
lex.advance() lex.advance()
@ -970,17 +966,17 @@ proc readAnchorName(lex: YamlLexer) =
raise lex.generateError("Anchor name must not be empty") raise lex.generateError("Anchor name must not be empty")
lex.state = afterToken lex.state = afterToken
proc insideLine(lex: YamlLexer): bool = proc insideLine(lex: var Lexer): bool =
case lex.c case lex.c
of ':': of ':':
lex.checkIndicatorChar(ltMapValueInd) lex.checkIndicatorChar(Token.MapValueInd)
if lex.cur == ltMapValueInd and lex.propertyIndentation != -1: if lex.cur == Token.MapValueInd and lex.propertyIndentation != -1:
lex.indentation = lex.propertyIndentation lex.indentation = lex.propertyIndentation
lex.propertyIndentation = -1 lex.propertyIndentation = -1
of '?': of '?':
lex.checkIndicatorChar(ltMapKeyInd) lex.checkIndicatorChar(Token.MapKeyInd)
of '-': of '-':
lex.checkIndicatorChar(ltSeqItemInd) lex.checkIndicatorChar(Token.SeqItemInd)
of commentOrLineEnd: of commentOrLineEnd:
lex.endLine() lex.endLine()
return false return false
@ -996,36 +992,36 @@ proc insideLine(lex: YamlLexer): bool =
else: else:
lex.readBlockScalar() lex.readBlockScalar()
of '{': of '{':
lex.enterFlowCollection(ltMapStart) lex.enterFlowCollection(Token.MapStart)
of '}': of '}':
lex.leaveFlowCollection(ltMapEnd) lex.leaveFlowCollection(Token.MapEnd)
of '[': of '[':
lex.enterFlowCollection(ltSeqStart) lex.enterFlowCollection(Token.SeqStart)
of ']': of ']':
lex.leaveFlowCollection(ltSeqEnd) lex.leaveFlowCollection(Token.SeqEnd)
of ',': of ',':
lex.startToken() lex.startToken()
lex.advance() lex.advance()
lex.endToken() lex.endToken()
lex.cur = ltSeqSep lex.cur = Token.SeqSep
lex.state = afterToken lex.state = afterToken
of '!': of '!':
lex.readNamespace() lex.readNamespace()
of '&': of '&':
lex.readAnchorName() lex.readAnchorName()
lex.endToken() lex.endToken()
lex.cur = ltAnchor lex.cur = Token.Anchor
of '*': of '*':
lex.readAnchorName() lex.readAnchorName()
lex.endToken() lex.endToken()
lex.cur = ltAlias lex.cur = Token.Alias
of '@', '`': of '@', '`':
raise lex.generateError("Reserved character may not start any token") raise lex.generateError("Reserved character may not start any token")
else: else:
lex.readPlainScalar() lex.readPlainScalar()
return true return true
proc indentationSettingToken(lex: YamlLexer): bool = proc indentationSettingToken(lex: var Lexer): bool =
let cachedIntentation = lex.columnNumber() let cachedIntentation = lex.columnNumber()
result = lex.insideLine() result = lex.insideLine()
if result and lex.flowDepth > 0: if result and lex.flowDepth > 0:
@ -1034,7 +1030,7 @@ proc indentationSettingToken(lex: YamlLexer): bool =
else: else:
lex.indentation = cachedIntentation lex.indentation = cachedIntentation
proc afterToken(lex: YamlLexer): bool = proc afterToken(lex: var Lexer): bool =
while lex.c == ' ': lex.advance() while lex.c == ' ': lex.advance()
if lex.c in commentOrLineEnd: if lex.c in commentOrLineEnd:
lex.endLine() lex.endLine()
@ -1042,13 +1038,13 @@ proc afterToken(lex: YamlLexer): bool =
lex.state = insideLine lex.state = insideLine
return false return false
proc beforeIndentationSettingToken(lex: YamlLexer): bool = proc beforeIndentationSettingToken(lex: var Lexer): bool =
discard lex.afterToken() discard lex.afterToken()
if lex.state == insideLine: if lex.state == insideLine:
lex.state = indentationSettingToken lex.state = indentationSettingToken
return false return false
proc afterJsonEnablingToken(lex: YamlLexer): bool = proc afterJsonEnablingToken(lex: var Lexer): bool =
while lex.c == ' ': lex.advance() while lex.c == ' ': lex.advance()
while true: while true:
case lex.c case lex.c
@ -1056,7 +1052,7 @@ proc afterJsonEnablingToken(lex: YamlLexer): bool =
lex.startToken() lex.startToken()
lex.advance() lex.advance()
lex.endToken() lex.endToken()
lex.cur = ltMapValueInd lex.cur = Token.MapValueInd
lex.state = afterToken lex.state = afterToken
of '#', '\l', '\c': of '#', '\l', '\c':
lex.endLine() lex.endLine()
@ -1068,43 +1064,43 @@ proc afterJsonEnablingToken(lex: YamlLexer): bool =
lex.state = insideLine lex.state = insideLine
return false return false
proc lineIndentation(lex: YamlLexer): bool = proc lineIndentation(lex: var Lexer): bool =
lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1 lex.curStartPos.column = 1
lex.endToken() lex.endToken()
lex.cur = ltIndentation lex.cur = Token.Indentation
lex.state = indentationSettingToken lex.state = indentationSettingToken
return true return true
proc lineDirEnd(lex: YamlLexer): bool = proc lineDirEnd(lex: var Lexer): bool =
lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1 lex.curStartPos.column = 1
lex.endToken() lex.endToken()
lex.cur = ltDirectivesEnd lex.cur = Token.DirectivesEnd
lex.indentation = -1 lex.indentation = -1
lex.propertyIndentation = -1 lex.propertyIndentation = -1
return true return true
proc lineDocEnd(lex: YamlLexer): bool = proc lineDocEnd(lex: var Lexer): bool =
lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.line = lex.source.lineNumber
lex.curStartPos.column = 1 lex.curStartPos.column = 1
lex.endToken() lex.endToken()
lex.cur = ltDocumentEnd lex.cur = Token.DocumentEnd
lex.state = expectLineEnd lex.state = expectLineEnd
lex.lineStartState = outsideDoc lex.lineStartState = outsideDoc
return true return true
proc atSuffix(lex: YamlLexer): bool = proc atSuffix(lex: var Lexer): bool =
lex.startToken() lex.startToken()
while lex.c in suffixChars: lex.advance() while lex.c in suffixChars: lex.advance()
lex.buf = lex.fullLexeme() lex.evaluated = lex.fullLexeme()
lex.endToken() lex.endToken()
lex.cur = ltSuffix lex.cur = Token.Suffix
lex.state = afterToken lex.state = afterToken
return true return true
proc streamEnd(lex: YamlLexer): bool = proc streamEnd(lex: var Lexer): bool =
lex.startToken() lex.startToken()
lex.endToken() lex.endToken()
lex.cur = ltStreamEnd lex.cur = Token.StreamEnd
return true return true

View File

@ -12,66 +12,12 @@
## operate. It is not named ``streams`` to not confuse it with the modle in the ## operate. It is not named ``streams`` to not confuse it with the modle in the
## stdlib with that name. ## stdlib with that name.
import hashes import data
import private/internal, taglib
when defined(nimNoNil): when defined(nimNoNil):
{.experimental: "notnil".} {.experimental: "notnil".}
when defined(yamlScalarRepInd):
type ScalarRepresentationIndicator* = enum
srPlain, srSingleQuoted, srDoubleQuoted, srLiteral, srFolded
type type
AnchorId* = distinct int ## \
## An ``AnchorId`` identifies an anchor in the current document. It
## becomes invalid as soon as the current document scope is invalidated
## (for example, because the parser yielded a ``yamlEndDocument``
## event). ``AnchorId`` s exists because of efficiency, much like
## ``TagId`` s. The actual anchor name is a presentation detail and
## cannot be queried by the user.
YamlStreamEventKind* = enum
## Kinds of YAML events that may occur in an ``YamlStream``. Event kinds
## are discussed in `YamlStreamEvent <#YamlStreamEvent>`_.
yamlStartDoc, yamlEndDoc, yamlStartMap, yamlEndMap,
yamlStartSeq, yamlEndSeq, yamlScalar, yamlAlias
YamlStreamEvent* = object
## An element from a `YamlStream <#YamlStream>`_. Events that start an
## object (``yamlStartMap``, ``yamlStartSeq``, ``yamlScalar``) have
## an optional anchor and a tag associated with them. The anchor will be
## set to ``yAnchorNone`` if it doesn't exist.
##
## A non-existing tag in the YAML character stream will be resolved to
## the non-specific tags ``?`` or ``!`` according to the YAML
## specification. These are by convention mapped to the ``TagId`` s
## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively.
## Mapping is done by a `TagLibrary <#TagLibrary>`_.
case kind*: YamlStreamEventKind
of yamlStartMap:
mapAnchor* : AnchorId
mapTag* : TagId
of yamlStartSeq:
seqAnchor* : AnchorId
seqTag* : TagId
of yamlScalar:
scalarAnchor* : AnchorId
scalarTag* : TagId
scalarContent*: string # may not be nil (but empty)
when defined(yamlScalarRepInd):
scalarRep* : ScalarRepresentationIndicator
of yamlStartDoc:
when defined(yamlScalarRepInd):
explicitDirectivesEnd*: bool
else: discard
of yamlEndDoc:
when defined(yamlScalarRepInd):
explicitDocumentEnd*: bool
of yamlEndMap, yamlEndSeq: discard
of yamlAlias:
aliasTarget* : AnchorId
YamlStream* = ref object of RootObj ## \ YamlStream* = ref object of RootObj ## \
## A ``YamlStream`` is an iterator-like object that yields a ## A ``YamlStream`` is an iterator-like object that yields a
## well-formed stream of ``YamlStreamEvents``. Well-formed means that ## well-formed stream of ``YamlStreamEvents``. Well-formed means that
@ -85,27 +31,18 @@ type
## and is not required to check for it. The procs in this module will ## and is not required to check for it. The procs in this module will
## always yield a well-formed ``YamlStream`` and expect it to be ## always yield a well-formed ``YamlStream`` and expect it to be
## well-formed if they take it as input parameter. ## well-formed if they take it as input parameter.
nextImpl*: proc(s: YamlStream, e: var YamlStreamEvent): bool nextImpl*: proc(s: YamlStream, e: var Event): bool
lastTokenContextImpl*: lastTokenContextImpl*:
proc(s: YamlStream, line, column: var int, proc(s: YamlStream, line, column: var int,
lineContent: var string): bool {.raises: [].} lineContent: var string): bool {.raises: [].}
isFinished*: bool
peeked: bool peeked: bool
cached: YamlStreamEvent cached: Event
YamlStreamError* = object of Exception YamlStreamError* = object of ValueError
## Exception that may be raised by a ``YamlStream`` when the underlying ## Exception that may be raised by a ``YamlStream`` when the underlying
## backend raises an exception. The error that has occurred is ## backend raises an exception. The error that has occurred is
## available from ``parent``. ## available from ``parent``.
const
yAnchorNone*: AnchorId = (-1).AnchorId ## \
## yielded when no anchor was defined for a YAML node
proc `==`*(left, right: AnchorId): bool {.borrow.}
proc `$`*(id: AnchorId): string {.borrow.}
proc hash*(id: AnchorId): Hash {.borrow.}
proc noLastContext(s: YamlStream, line, column: var int, proc noLastContext(s: YamlStream, line, column: var int,
lineContent: var string): bool {.raises: [].} = lineContent: var string): bool {.raises: [].} =
(line, column, lineContent) = (-1, -1, "") (line, column, lineContent) = (-1, -1, "")
@ -117,111 +54,73 @@ proc basicInit*(s: YamlStream, lastTokenContextImpl:
## initialize basic values of the YamlStream. Call this in your constructor ## initialize basic values of the YamlStream. Call this in your constructor
## if you subclass YamlStream. ## if you subclass YamlStream.
s.peeked = false s.peeked = false
s.isFinished = false
s.lastTokenContextImpl = lastTokenContextImpl s.lastTokenContextImpl = lastTokenContextImpl
when not defined(JS): when not defined(JS):
type IteratorYamlStream = ref object of YamlStream type IteratorYamlStream = ref object of YamlStream
backend: iterator(): YamlStreamEvent backend: iterator(): Event
proc initYamlStream*(backend: iterator(): YamlStreamEvent): YamlStream proc initYamlStream*(backend: iterator(): Event): YamlStream
{.raises: [].} = {.raises: [].} =
## Creates a new ``YamlStream`` that uses the given iterator as backend. ## Creates a new ``YamlStream`` that uses the given iterator as backend.
result = new(IteratorYamlStream) result = new(IteratorYamlStream)
result.basicInit() result.basicInit()
IteratorYamlStream(result).backend = backend IteratorYamlStream(result).backend = backend
result.nextImpl = proc(s: YamlStream, e: var YamlStreamEvent): bool = result.nextImpl = proc(s: YamlStream, e: var Event): bool =
e = IteratorYamlStream(s).backend() e = IteratorYamlStream(s).backend()
if finished(IteratorYamlStream(s).backend): result = true
s.isFinished = true
result = false
else: result = true
type type
BufferYamlStream* = ref object of YamlStream BufferYamlStream* = ref object of YamlStream
pos: int pos: int
buf: seq[YamlStreamEvent] buf: seq[Event]
proc newBufferYamlStream*(): BufferYamlStream not nil = proc newBufferYamlStream*(): BufferYamlStream not nil =
result = cast[BufferYamlStream not nil](new(BufferYamlStream)) result = cast[BufferYamlStream not nil](new(BufferYamlStream))
result.basicInit() result.basicInit()
result.buf = @[] result.buf = @[]
result.pos = 0 result.pos = 0
result.nextImpl = proc(s: YamlStream, e: var YamlStreamEvent): bool = result.nextImpl = proc(s: YamlStream, e: var Event): bool =
let bys = BufferYamlStream(s) let bys = BufferYamlStream(s)
if bys.pos == bys.buf.len: e = bys.buf[bys.pos]
result = false inc(bys.pos)
s.isFinished = true result = true
else:
e = bys.buf[bys.pos]
inc(bys.pos)
result = true
proc put*(bys: BufferYamlStream, e: YamlStreamEvent) {.raises: [].} = proc put*(bys: BufferYamlStream, e: Event) {.raises: [].} =
bys.buf.add(e) bys.buf.add(e)
proc next*(s: YamlStream): YamlStreamEvent {.raises: [YamlStreamError].} = proc next*(s: YamlStream): Event {.raises: [YamlStreamError].} =
## Get the next item of the stream. Requires ``finished(s) == true``. ## Get the next item of the stream. Requires ``finished(s) == true``.
## If the backend yields an exception, that exception will be encapsulated ## If the backend yields an exception, that exception will be encapsulated
## into a ``YamlStreamError``, which will be raised. ## into a ``YamlStreamError``, which will be raised.
if s.peeked: if s.peeked:
s.peeked = false s.peeked = false
shallowCopy(result, s.cached) return move(s.cached)
return
else: else:
yAssert(not s.isFinished)
try: try:
while true: while true:
if s.nextImpl(s, result): break if s.nextImpl(s, result): break
yAssert(not s.isFinished)
except YamlStreamError:
let cur = getCurrentException()
var e = newException(YamlStreamError, cur.msg)
e.parent = cur.parent
raise e
except Exception: except Exception:
let cur = getCurrentException() let cur = getCurrentException()
var e = newException(YamlStreamError, cur.msg) var e = newException(YamlStreamError, cur.msg)
e.parent = cur e.parent = cur
raise e raise e
proc peek*(s: YamlStream): YamlStreamEvent {.raises: [YamlStreamError].} = proc peek*(s: YamlStream): Event {.raises: [YamlStreamError].} =
## Get the next item of the stream without advancing the stream. ## Get the next item of the stream without advancing the stream.
## Requires ``finished(s) == true``. Handles exceptions of the backend like ## Requires ``finished(s) == true``. Handles exceptions of the backend like
## ``next()``. ## ``next()``.
if not s.peeked: if not s.peeked:
shallowCopy(s.cached, s.next()) s.cached = s.next()
s.peeked = true s.peeked = true
shallowCopy(result, s.cached) shallowCopy(result, s.cached)
proc `peek=`*(s: YamlStream, value: YamlStreamEvent) {.raises: [].} = proc `peek=`*(s: YamlStream, value: Event) {.raises: [].} =
## Set the next item of the stream. Will replace a previously peeked item, ## Set the next item of the stream. Will replace a previously peeked item,
## if one exists. ## if one exists.
s.cached = value s.cached = value
s.peeked = true s.peeked = true
proc finished*(s: YamlStream): bool {.raises: [YamlStreamError].} =
## ``true`` if no more items are available in the stream. Handles exceptions
## of the backend like ``next()``.
if s.peeked: result = false
else:
try:
while true:
if s.isFinished: return true
if s.nextImpl(s, s.cached):
s.peeked = true
return false
except YamlStreamError:
let cur = getCurrentException()
var e = newException(YamlStreamError, cur.msg)
e.parent = cur.parent
raise e
except Exception:
let cur = getCurrentException()
var e = newException(YamlStreamError, cur.msg)
e.parent = cur
raise e
proc getLastTokenContext*(s: YamlStream, line, column: var int, proc getLastTokenContext*(s: YamlStream, line, column: var int,
lineContent: var string): bool = lineContent: var string): bool =
## ``true`` if source context information is available about the last returned ## ``true`` if source context information is available about the last returned
@ -229,139 +128,17 @@ proc getLastTokenContext*(s: YamlStream, line, column: var int,
## line content where the last token has been read from. ## line content where the last token has been read from.
result = s.lastTokenContextImpl(s, line, column, lineContent) result = s.lastTokenContextImpl(s, line, column, lineContent)
iterator items*(s: YamlStream): YamlStreamEvent iterator items*(s: YamlStream): Event
{.raises: [YamlStreamError].} = {.raises: [YamlStreamError].} =
## Iterate over all items of the stream. You may not use ``peek()`` on the ## Iterate over all items of the stream. You may not use ``peek()`` on the
## stream while iterating. ## stream while iterating.
while not s.finished(): yield s.next() while true:
let e = s.next()
var last = e.kind == yamlEndStream
yield e
if last: break
iterator mitems*(bys: BufferYamlStream): var YamlStreamEvent {.raises: [].} = iterator mitems*(bys: BufferYamlStream): var Event {.raises: [].} =
## Iterate over all items of the stream. You may not use ``peek()`` on the ## Iterate over all items of the stream. You may not use ``peek()`` on the
## stream while iterating. ## stream while iterating.
for e in bys.buf.mitems(): yield e for e in bys.buf.mitems(): yield e
proc `==`*(left: YamlStreamEvent, right: YamlStreamEvent): bool {.raises: [].} =
## compares all existing fields of the given items
if left.kind != right.kind: return false
case left.kind
of yamlStartDoc, yamlEndDoc, yamlEndMap, yamlEndSeq: result = true
of yamlStartMap:
result = left.mapAnchor == right.mapAnchor and left.mapTag == right.mapTag
of yamlStartSeq:
result = left.seqAnchor == right.seqAnchor and left.seqTag == right.seqTag
of yamlScalar:
result = left.scalarAnchor == right.scalarAnchor and
left.scalarTag == right.scalarTag and
left.scalarContent == right.scalarContent
of yamlAlias: result = left.aliasTarget == right.aliasTarget
proc renderAttrs(tag: TagId, anchor: AnchorId, isPlain: bool = true): string =
result = ""
if anchor != yAnchorNone: result &= " &" & $anchor
case tag
of yTagQuestionmark: discard
of yTagExclamationmark:
when defined(yamlScalarRepInd):
if isPlain: result &= " <!>"
else:
result &= " <" & $tag & ">"
proc `$`*(event: YamlStreamEvent): string {.raises: [].} =
## outputs a human-readable string describing the given event.
## This string is compatible to the format used in the yaml test suite.
case event.kind
of yamlEndMap: result = "-MAP"
of yamlEndSeq: result = "-SEQ"
of yamlStartDoc:
result = "+DOC"
when defined(yamlScalarRepInd):
if event.explicitDirectivesEnd: result &= " ---"
of yamlEndDoc:
result = "-DOC"
when defined(yamlScalarRepInd):
if event.explicitDocumentEnd: result &= " ..."
of yamlStartMap: result = "+MAP" & renderAttrs(event.mapTag, event.mapAnchor)
of yamlStartSeq: result = "+SEQ" & renderAttrs(event.seqTag, event.seqAnchor)
of yamlScalar:
when defined(yamlScalarRepInd):
result = "=VAL" & renderAttrs(event.scalarTag, event.scalarAnchor,
event.scalarRep == srPlain)
case event.scalarRep
of srPlain: result &= " :"
of srSingleQuoted: result &= " \'"
of srDoubleQuoted: result &= " \""
of srLiteral: result &= " |"
of srFolded: result &= " >"
else:
result = "=VAL" & renderAttrs(event.scalarTag, event.scalarAnchor,
false)
if event.scalarTag == yTagExclamationmark: result &= " \""
else: result &= " :"
result &= yamlTestSuiteEscape(event.scalarContent)
of yamlAlias: result = "=ALI *" & $event.aliasTarget
proc tag*(event: YamlStreamEvent): TagId {.raises: [FieldError].} =
## returns the tag of the given event
case event.kind
of yamlStartMap: result = event.mapTag
of yamlStartSeq: result = event.seqTag
of yamlScalar: result = event.scalarTag
else: raise newException(FieldError, "Event " & $event.kind & " has no tag")
when defined(yamlScalarRepInd):
proc startDocEvent*(explicit: bool = false): YamlStreamEvent
{.inline, raises: [].} =
## creates a new event that marks the start of a YAML document
result = YamlStreamEvent(kind: yamlStartDoc,
explicitDirectivesEnd: explicit)
proc endDocEvent*(explicit: bool = false): YamlStreamEvent
{.inline, raises: [].} =
## creates a new event that marks the end of a YAML document
result = YamlStreamEvent(kind: yamlEndDoc, explicitDocumentEnd: explicit)
else:
proc startDocEvent*(): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the start of a YAML document
result = YamlStreamEvent(kind: yamlStartDoc)
proc endDocEvent*(): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the end of a YAML document
result = YamlStreamEvent(kind: yamlEndDoc)
proc startMapEvent*(tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the start of a YAML mapping
result = YamlStreamEvent(kind: yamlStartMap, mapTag: tag, mapAnchor: anchor)
proc endMapEvent*(): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the end of a YAML mapping
result = YamlStreamEvent(kind: yamlEndMap)
proc startSeqEvent*(tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the beginning of a YAML sequence
result = YamlStreamEvent(kind: yamlStartSeq, seqTag: tag, seqAnchor: anchor)
proc endSeqEvent*(): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that marks the end of a YAML sequence
result = YamlStreamEvent(kind: yamlEndSeq)
when defined(yamlScalarRepInd):
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone,
scalarRep: ScalarRepresentationIndicator = srPlain):
YamlStreamEvent {.inline, raises: [].} =
## creates a new event that represents a YAML scalar
result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag,
scalarAnchor: anchor, scalarContent: content,
scalarRep: scalarRep)
else:
proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark,
anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that represents a YAML scalar
result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag,
scalarAnchor: anchor, scalarContent: content)
proc aliasEvent*(anchor: AnchorId): YamlStreamEvent {.inline, raises: [].} =
## creates a new event that represents a YAML alias
result = YamlStreamEvent(kind: yamlAlias, aliasTarget: anchor)

View File

@ -13,20 +13,9 @@
## the serialization API. ## the serialization API.
import tables, macros, hashes, strutils import tables, macros, hashes, strutils
import data
type type
TagId* = distinct int ## \
## A ``TagId`` identifies a tag URI, like for example
## ``"tag:yaml.org,2002:str"``. The URI corresponding to a ``TagId`` can
## be queried from the `TagLibrary <#TagLibrary>`_ which was
## used to create this ``TagId``; e.g. when you parse a YAML character
## stream, the ``TagLibrary`` of the parser is the one which generates
## the resulting ``TagId`` s.
##
## URI strings are mapped to ``TagId`` s for efficiency reasons (you
## do not need to compare strings every time) and to be able to
## discover unknown tag URIs early in the parsing process.
TagLibrary* = ref object TagLibrary* = ref object
## A ``TagLibrary`` maps tag URIs to ``TagId`` s. ## A ``TagLibrary`` maps tag URIs to ``TagId`` s.
## ##
@ -43,87 +32,6 @@ type
nextCustomTagId*: TagId nextCustomTagId*: TagId
tagHandles: Table[string, string] tagHandles: Table[string, string]
const
# failsafe schema
yTagExclamationMark*: TagId = 0.TagId ## ``!`` non-specific tag
yTagQuestionMark* : TagId = 1.TagId ## ``?`` non-specific tag
yTagString* : TagId = 2.TagId ## \
## `!!str <http://yaml.org/type/str.html >`_ tag
yTagSequence* : TagId = 3.TagId ## \
## `!!seq <http://yaml.org/type/seq.html>`_ tag
yTagMapping* : TagId = 4.TagId ## \
## `!!map <http://yaml.org/type/map.html>`_ tag
# json & core schema
yTagNull* : TagId = 5.TagId ## \
## `!!null <http://yaml.org/type/null.html>`_ tag
yTagBoolean* : TagId = 6.TagId ## \
## `!!bool <http://yaml.org/type/bool.html>`_ tag
yTagInteger* : TagId = 7.TagId ## \
## `!!int <http://yaml.org/type/int.html>`_ tag
yTagFloat* : TagId = 8.TagId ## \
## `!!float <http://yaml.org/type/float.html>`_ tag
# other language-independent YAML types (from http://yaml.org/type/ )
yTagOrderedMap* : TagId = 9.TagId ## \
## `!!omap <http://yaml.org/type/omap.html>`_ tag
yTagPairs* : TagId = 10.TagId ## \
## `!!pairs <http://yaml.org/type/pairs.html>`_ tag
yTagSet* : TagId = 11.TagId ## \
## `!!set <http://yaml.org/type/set.html>`_ tag
yTagBinary* : TagId = 12.TagId ## \
## `!!binary <http://yaml.org/type/binary.html>`_ tag
yTagMerge* : TagId = 13.TagId ## \
## `!!merge <http://yaml.org/type/merge.html>`_ tag
yTagTimestamp* : TagId = 14.TagId ## \
## `!!timestamp <http://yaml.org/type/timestamp.html>`_ tag
yTagValue* : TagId = 15.TagId ## \
## `!!value <http://yaml.org/type/value.html>`_ tag
yTagYaml* : TagId = 16.TagId ## \
## `!!yaml <http://yaml.org/type/yaml.html>`_ tag
yTagNimField* : TagId = 100.TagId ## \
## This tag is used in serialization for the name of a field of an
## object. It may contain any string scalar that is a valid Nim symbol.
yFirstStaticTagId* : TagId = 1000.TagId ## \
## The first ``TagId`` assigned by the ``setTagId`` templates.
yFirstCustomTagId* : TagId = 10000.TagId ## \
## The first ``TagId`` which should be assigned to an URI that does not
## exist in the ``YamlTagLibrary`` which is used for parsing.
yamlTagRepositoryPrefix* = "tag:yaml.org,2002:"
nimyamlTagRepositoryPrefix* = "tag:nimyaml.org,2016:"
proc `==`*(left, right: TagId): bool {.borrow.}
proc hash*(id: TagId): Hash {.borrow.}
proc `$`*(id: TagId): string {.raises: [].} =
case id
of yTagQuestionMark: "?"
of yTagExclamationMark: "!"
of yTagString: "!!str"
of yTagSequence: "!!seq"
of yTagMapping: "!!map"
of yTagNull: "!!null"
of yTagBoolean: "!!bool"
of yTagInteger: "!!int"
of yTagFloat: "!!float"
of yTagOrderedMap: "!!omap"
of yTagPairs: "!!pairs"
of yTagSet: "!!set"
of yTagBinary: "!!binary"
of yTagMerge: "!!merge"
of yTagTimestamp: "!!timestamp"
of yTagValue: "!!value"
of yTagYaml: "!!yaml"
of yTagNimField: "!nim:field"
else: "<" & $int(id) & ">"
proc initTagLibrary*(): TagLibrary {.raises: [].} = proc initTagLibrary*(): TagLibrary {.raises: [].} =
## initializes the ``tags`` table and sets ``nextCustomTagId`` to ## initializes the ``tags`` table and sets ``nextCustomTagId`` to
## ``yFirstCustomTagId``. ## ``yFirstCustomTagId``.
@ -301,6 +209,12 @@ proc searchHandle*(tagLib: TagLibrary, tag: string):
result.len = key.len result.len = key.len
result.handle = value result.handle = value
proc resolve*(tagLib: TagLibrary, handle: string): string {.raises: [].} =
## try to resolve the given tag handle.
## return the registered URI if the tag handle is found.
## if the handle is unknown, return the empty string.
return tagLib.tagHandles.getOrDefault(handle, "")
iterator handles*(tagLib: TagLibrary): tuple[prefix, handle: string] = iterator handles*(tagLib: TagLibrary): tuple[prefix, handle: string] =
## iterate over registered tag handles that may be used as shortcuts ## iterate over registered tag handles that may be used as shortcuts
## (e.g. ``!n!`` for ``tag:nimyaml.org,2016:``) ## (e.g. ``!n!`` for ``tag:nimyaml.org,2016:``)