diff --git a/test/yaml-test-suite b/test/yaml-test-suite index e11b848..fa266d1 160000 --- a/test/yaml-test-suite +++ b/test/yaml-test-suite @@ -1 +1 @@ -Subproject commit e11b84891a3cd7aa4460f9d09c99283c974c02c6 +Subproject commit fa266d172ef7b2cdd35219b8e677fa5e0ae86269 diff --git a/yaml/data.nim b/yaml/data.nim new file mode 100644 index 0000000..2e8d3f1 --- /dev/null +++ b/yaml/data.nim @@ -0,0 +1,306 @@ +import hashes +import private/internal + +type + Anchor* = distinct string ## \ + ## An ``Anchor`` identifies an anchor in the current document. + ## It is not necessarily unique and references to an anchor must be + ## resolved immediately on occurrence. + ## + ## Anchor provides the operator `$` for converting to string, `==` for + ## comparison, and `hash` for usage in a hashmap. + + TagId* = distinct int ## \ + ## A ``TagId`` identifies a tag URI, like for example + ## ``"tag:yaml.org,2002:str"``. The URI corresponding to a ``TagId`` can + ## be queried from the `TagLibrary <#TagLibrary>`_ which was + ## used to create this ``TagId``; e.g. when you parse a YAML character + ## stream, the ``TagLibrary`` of the parser is the one which generates + ## the resulting ``TagId`` s. + ## + ## URI strings are mapped to ``TagId`` s for efficiency reasons (you + ## do not need to compare strings every time) and to be able to + ## discover unknown tag URIs early in the parsing process. + + ScalarStyle* = enum + ## Original style of the scalar (for input), + ## or desired style of the scalar (for output). + ssAny, ssPlain, ssSingleQuoted, ssDoubleQuoted, ssLiteral, ssFolded + + CollectionStyle* = enum + csBlock, csFlow + + EventKind* = enum + ## Kinds of YAML events that may occur in an ``YamlStream``. Event kinds + ## are discussed in `YamlStreamEvent <#YamlStreamEvent>`_. + yamlStartStream, yamlEndStream, + yamlStartDoc, yamlEndDoc, yamlStartMap, yamlEndMap, + yamlStartSeq, yamlEndSeq, yamlScalar, yamlAlias + + Event* = object + ## An element from a `YamlStream <#YamlStream>`_. Events that start an + ## object (``yamlStartMap``, ``yamlStartSeq``, ``yamlScalar``) have + ## an optional anchor and a tag associated with them. The anchor will be + ## set to ``yAnchorNone`` if it doesn't exist. + ## + ## A missing tag in the YAML character stream generates + ## the non-specific tags ``?`` or ``!`` according to the YAML + ## specification. These are by convention mapped to the ``TagId`` s + ## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively. + ## Mapping is done by a `TagLibrary <#TagLibrary>`_. + ## + ## ``startPos`` and ``endPos`` are only relevant for events from an input + ## stream - they are generally ignored if used with events that generate + ## output. + startPos*, endPos*: Mark + case kind*: EventKind + of yamlStartStream, yamlEndStream: discard + of yamlStartMap: + mapProperties*: Properties + mapStyle*: CollectionStyle + of yamlStartSeq: + seqProperties*: Properties + seqStyle*: CollectionStyle + of yamlScalar: + scalarProperties*: Properties + scalarStyle* : ScalarStyle + scalarContent*: string + of yamlStartDoc: + explicitDirectivesEnd*: bool + version*: string + of yamlEndDoc: + explicitDocumentEnd*: bool + of yamlEndMap, yamlEndSeq: discard + of yamlAlias: + aliasTarget* : Anchor + + Mark* = tuple[line, column: Positive] + + Properties* = tuple[anchor: Anchor, tag: TagId] + +const + yAnchorNone*: Anchor = "".Anchor ## \ + ## yielded when no anchor was defined for a YAML node + + defaultMark: Mark = (1.Positive, 1.Positive) ## \ + ## used for events that are not generated from input. + + yTagExclamationMark*: TagId = 0.TagId ## ``!`` non-specific tag + yTagQuestionMark* : TagId = 1.TagId ## ``?`` non-specific tag + + # failsafe schema + + yTagString* : TagId = 2.TagId ## \ + ## `!!str `_ tag + yTagSequence* : TagId = 3.TagId ## \ + ## `!!seq `_ tag + yTagMapping* : TagId = 4.TagId ## \ + ## `!!map `_ tag + + # json & core schema + + yTagNull* : TagId = 5.TagId ## \ + ## `!!null `_ tag + yTagBoolean* : TagId = 6.TagId ## \ + ## `!!bool `_ tag + yTagInteger* : TagId = 7.TagId ## \ + ## `!!int `_ tag + yTagFloat* : TagId = 8.TagId ## \ + ## `!!float `_ tag + + # other language-independent YAML types (from http://yaml.org/type/ ) + + yTagOrderedMap* : TagId = 9.TagId ## \ + ## `!!omap `_ tag + yTagPairs* : TagId = 10.TagId ## \ + ## `!!pairs `_ tag + yTagSet* : TagId = 11.TagId ## \ + ## `!!set `_ tag + yTagBinary* : TagId = 12.TagId ## \ + ## `!!binary `_ tag + yTagMerge* : TagId = 13.TagId ## \ + ## `!!merge `_ tag + yTagTimestamp* : TagId = 14.TagId ## \ + ## `!!timestamp `_ tag + yTagValue* : TagId = 15.TagId ## \ + ## `!!value `_ tag + yTagYaml* : TagId = 16.TagId ## \ + ## `!!yaml `_ tag + + yTagNimField* : TagId = 100.TagId ## \ + ## This tag is used in serialization for the name of a field of an + ## object. It may contain any string scalar that is a valid Nim symbol. + + yFirstStaticTagId* : TagId = 1000.TagId ## \ + ## The first ``TagId`` assigned by the ``setTagId`` templates. + + yFirstCustomTagId* : TagId = 10000.TagId ## \ + ## The first ``TagId`` which should be assigned to an URI that does not + ## exist in the ``YamlTagLibrary`` which is used for parsing. + + yamlTagRepositoryPrefix* = "tag:yaml.org,2002:" + nimyamlTagRepositoryPrefix* = "tag:nimyaml.org,2016:" + +proc properties*(event: Event): Properties = + ## returns the tag of the given event + case event.kind + of yamlStartMap: result = event.mapProperties + of yamlStartSeq: result = event.seqProperties + of yamlScalar: result = event.scalarProperties + else: raise newException(FieldDefect, "Event " & $event.kind & " has no properties") + +proc collectionStyle*(event: Event): CollectionStyle = + ## returns the style of the given collection start event + case event.kind + of yamlStartMap: result = event.mapStyle + of yamlStartSeq: result = event.seqStyle + else: raise (ref FieldDefect)(msg: "Event " & $event.kind & " has no collectionStyle") + +proc startDocEvent*(explicit: bool = false, startPos, endPos: Mark = defaultMark): Event + {.inline, raises: [].} = + ## creates a new event that marks the start of a YAML document + result = Event(startPos: startPos, endPos: endPos, + kind: yamlStartDoc, + explicitDirectivesEnd: explicit) + +proc endDocEvent*(explicit: bool = false, startPos, endPos: Mark = defaultMark): Event + {.inline, raises: [].} = + ## creates a new event that marks the end of a YAML document + result = Event(startPos: startPos, endPos: endPos, + kind: yamlEndDoc, explicitDocumentEnd: explicit) + +proc startMapEvent*(style: CollectionStyle, props: Properties, + startPos, endPos: Mark): Event {.inline, raises: [].} = + ## creates a new event that marks the start of a YAML mapping + result = Event(startPos: startPos, endPos: endPos, + kind: yamlStartMap, mapProperties: props, + mapStyle: style) + +proc startMapEvent*(style: CollectionStyle, + tag: TagId = yTagQuestionMark, + anchor: Anchor = yAnchorNone, + startPos, endPos: Mark): Event {.inline.} = + return startMapEvent(style, (anchor, tag), startPos, endPos) + +proc endMapEvent*(startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} = + ## creates a new event that marks the end of a YAML mapping + result = Event(startPos: startPos, endPos: endPos, kind: yamlEndMap) + +proc startSeqEvent*(style: CollectionStyle, + props: Properties, + startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} = + ## creates a new event that marks the beginning of a YAML sequence + result = Event(startPos: startPos, endPos: endPos, + kind: yamlStartSeq, seqProperties: props, + seqStyle: style) + +proc startSeqEvent*(style: CollectionStyle, + tag: TagId = yTagQuestionMark, + anchor: Anchor = yAnchorNone, + startPos, endPos: Mark = defaultMark): Event {.inline.} = + return startSeqEvent(style, (anchor, tag), startPos, endPos) + +proc endSeqEvent*(startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} = + ## creates a new event that marks the end of a YAML sequence + result = Event(startPos: startPos, endPos: endPos, kind: yamlEndSeq) + +proc scalarEvent*(content: string, props: Properties, + style: ScalarStyle = ssAny, + startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} = + ## creates a new event that represents a YAML scalar + result = Event(startPos: startPos, endPos: endPos, + kind: yamlScalar, scalarProperties: props, + scalarContent: content, scalarStyle: style) + +proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark, + anchor: Anchor = yAnchorNone, + style: ScalarStyle = ssAny, + startPos, endPos: Mark = defaultMark): Event {.inline.} = + return scalarEvent(content, (anchor, tag), style, startPos, endPos) + +proc aliasEvent*(target: Anchor, startPos, endPos: Mark = defaultMark): Event {.inline, raises: [].} = + ## creates a new event that represents a YAML alias + result = Event(startPos: startPos, endPos: endPos, kind: yamlAlias, aliasTarget: target) + +proc `==`*(left, right: Anchor): bool {.borrow.} +proc `$`*(id: Anchor): string {.borrow.} +proc hash*(id: Anchor): Hash {.borrow.} + +proc `==`*(left, right: TagId): bool {.borrow.} +proc hash*(id: TagId): Hash {.borrow.} + +proc `$`*(id: TagId): string {.raises: [].} = + case id + of yTagQuestionMark: "?" + of yTagExclamationMark: "!" + of yTagString: "!!str" + of yTagSequence: "!!seq" + of yTagMapping: "!!map" + of yTagNull: "!!null" + of yTagBoolean: "!!bool" + of yTagInteger: "!!int" + of yTagFloat: "!!float" + of yTagOrderedMap: "!!omap" + of yTagPairs: "!!pairs" + of yTagSet: "!!set" + of yTagBinary: "!!binary" + of yTagMerge: "!!merge" + of yTagTimestamp: "!!timestamp" + of yTagValue: "!!value" + of yTagYaml: "!!yaml" + of yTagNimField: "!nim:field" + else: "<" & $int(id) & ">" + +proc `==`*(left: Event, right: Event): bool {.raises: [].} = + ## compares all existing fields of the given items + if left.kind != right.kind: return false + case left.kind + of yamlStartStream, yamlEndStream, yamlStartDoc, yamlEndDoc, yamlEndMap, yamlEndSeq: + result = true + of yamlStartMap: + result = left.mapProperties == right.mapProperties + of yamlStartSeq: + result = left.seqProperties == right.seqProperties + of yamlScalar: + result = left.scalarProperties == right.scalarProperties and + left.scalarContent == right.scalarContent + of yamlAlias: result = left.aliasTarget == right.aliasTarget + +proc renderAttrs*(props: Properties, isPlain: bool = true): string = + result = "" + if props.anchor != yAnchorNone: result &= " &" & $props.anchor + case props.tag + of yTagQuestionmark: discard + of yTagExclamationmark: + if isPlain: result &= " " + else: + result &= " <" & $props.tag & ">" + +proc `$`*(event: Event): string {.raises: [].} = + ## outputs a human-readable string describing the given event. + ## This string is compatible to the format used in the yaml test suite. + case event.kind + of yamlStartStream: result = "+STR" + of yamlEndStream: result = "-STR" + of yamlEndMap: result = "-MAP" + of yamlEndSeq: result = "-SEQ" + of yamlStartDoc: + result = "+DOC" + if event.explicitDirectivesEnd: result &= " ---" + of yamlEndDoc: + result = "-DOC" + if event.explicitDocumentEnd: result &= " ..." + of yamlStartMap: result = "+MAP" & renderAttrs(event.mapProperties) + of yamlStartSeq: result = "+SEQ" & renderAttrs(event.mapProperties) + of yamlScalar: + result = "=VAL" & renderAttrs(event.scalarProperties, + event.scalarStyle == ssPlain or + event.scalarStyle == ssAny) + case event.scalarStyle + of ssPlain, ssAny: result &= " :" + of ssSingleQuoted: result &= " \'" + of ssDoubleQuoted: result &= " \"" + of ssLiteral: result &= " |" + of ssFolded: result &= " >" + result &= yamlTestSuiteEscape(event.scalarContent) + of yamlAlias: result = "=ALI *" & $event.aliasTarget \ No newline at end of file diff --git a/yaml/parser.nim b/yaml/parser.nim index c0c35fb..a9b0efb 100644 --- a/yaml/parser.nim +++ b/yaml/parser.nim @@ -11,22 +11,13 @@ ## This is the low-level parser API. A ``YamlParser`` enables you to parse any ## non-nil string or Stream object as YAML character stream. -import tables, strutils, macros, streams -import taglib, stream, private/lex, private/internal +import tables, strutils, macros +import taglib, stream, private/lex, private/internal, data when defined(nimNoNil): {.experimental: "notnil".} type - WarningCallback* = proc(line, column: int, lineContent: string, - message: string) - ## Callback for parser warnings. Currently, this callback may be called - ## on two occasions while parsing a YAML document stream: - ## - ## - If the version number in the ``%YAML`` directive does not match - ## ``1.2``. - ## - If there is an unknown directive encountered. - YamlParser* = ref object ## A parser object. Retains its ``TagLibrary`` across calls to ## `parse <#parse,YamlParser,Stream>`_. Can be used @@ -34,41 +25,28 @@ type ## only until the document goes out of scope (i.e. until ## ``yamlEndDocument`` is yielded). tagLib: TagLibrary - callback: WarningCallback - anchors: Table[string, AnchorId] + issueWarnings: bool + anchors: Table[string, Anchor] - FastParseLevelKind = enum - fplUnknown, fplSequence, fplMapKey, fplMapValue, fplSinglePairKey, - fplSinglePairValue, fplDocument + State = proc(c: Context, e: var Event): bool - FastParseLevel = object - kind: FastParseLevelKind + Level = object + state: State indentation: int - ParserContext = ref object of YamlStream + Context = ref object of YamlStream p: YamlParser - lex: YamlLexer - storedState: proc(s: YamlStream, e: var YamlStreamEvent): bool - atSequenceItem: bool - flowdepth: int - ancestry: seq[FastParseLevel] - level: FastParseLevel - tag: TagId - anchor: AnchorId - shorthands: Table[string, string] - nextAnchorId: AnchorId - newlines: int - explicitFlowKey: bool - plainScalarStart: tuple[line, column: int] + lex: Lexer + levels: seq[Level] - LevelEndResult = enum - lerNothing, lerOne, lerAdditionalMapEnd + headerProps, inlineProps: Properties + headerStart, inlineStart: Mark + blockIndentation: int - YamlLoadingError* = object of Exception + YamlLoadingError* = object of ValueError ## Base class for all exceptions that may be raised during the process ## of loading a YAML character stream. - line*: int ## line number (1-based) where the error was encountered - column*: int ## column number (1-based) where the error was encountered + mark*: Mark ## position at which the error has occurred. lineContent*: string ## \ ## content of the line where the error was encountered. Includes a ## second line with a marker ``^`` at the position where the error @@ -105,1020 +83,860 @@ type ## Some elements in this list are vague. For a detailed description of a ## valid YAML character stream, see the YAML specification. +# interface + proc newYamlParser*(tagLib: TagLibrary = initExtendedTagLibrary(), - callback: WarningCallback = nil): YamlParser = + issueWarnings: bool = false): YamlParser = ## Creates a YAML parser. if ``callback`` is not ``nil``, it will be called ## whenever the parser yields a warning. new(result) result.tagLib = tagLib - result.callback = callback + result.issueWarnings = issueWarnings + +# implementation template debug(message: string) {.dirty.} = when defined(yamlDebug): try: styledWriteLine(stdout, fgBlue, message) except IOError: discard -proc generateError(c: ParserContext, message: string): - ref YamlParserError {.raises: [].} = - result = newException(YamlParserError, message) - (result.line, result.column) = c.lex.curStartPos - result.lineContent = c.lex.getTokenLine() +const defaultProperties = (yAnchorNone, yTagQuestionMark) -proc illegalToken(c: ParserContext, expected: string = ""): - ref YamlParserError {.raises: [].} = - var msg = "Illegal token" - if expected.len > 0: msg.add(" (expected " & expected & ")") - msg.add(": " & $c.lex.cur) - result = c.generateError(msg) +proc isEmpty(props: Properties): bool = + result = props.anchor == yAnchorNone and + props.tag == yTagQuestionMark -proc callCallback(c: ParserContext, msg: string) {.raises: [YamlParserError].} = +{.push gcSafe, locks: 0.} +proc atStreamStart(c: Context, e: var Event): bool +proc atStreamEnd(c: Context, e : var Event): bool +proc beforeDoc(c: Context, e: var Event): bool +proc beforeDocEnd(c: Context, e: var Event): bool +proc afterDirectivesEnd(c: Context, e: var Event): bool +proc beforeImplicitRoot(c: Context, e: var Event): bool +proc atBlockIndentation(c: Context, e: var Event): bool +proc beforeBlockIndentation(c: Context, e: var Event): bool +proc beforeNodeProperties(c: Context, e: var Event): bool +proc requireImplicitMapStart(c: Context, e: var Event): bool +proc afterCompactParent(c: Context, e: var Event): bool +proc afterCompactParentProps(c: Context, e: var Event): bool +proc requireInlineBlockItem(c: Context, e: var Event): bool +proc beforeFlowItemProps(c: Context, e: var Event): bool +proc inBlockSeq(c: Context, e: var Event): bool +proc beforeBlockMapValue(c: Context, e: var Event): bool +proc atBlockIndentationProps(c: Context, e: var Event): bool +proc afterFlowSeqSep(c: Context, e: var Event): bool +proc afterFlowMapSep(c: Context, e: var Event): bool +proc atBlockMapKeyProps(c: Context, e: var Event): bool +proc afterImplicitKey(c: Context, e: var Event): bool +proc afterBlockParent(c: Context, e: var Event): bool +proc afterBlockParentProps(c: Context, e: var Event): bool +proc afterImplicitPairStart(c: Context, e: var Event): bool +proc beforePairValue(c: Context, e: var Event): bool +proc atEmptyPairKey(c: Context, e: var Event): bool +proc afterFlowMapValue(c: Context, e: var Event): bool +proc afterFlowSeqSepProps(c: Context, e: var Event): bool +proc afterFlowSeqItem(c: Context, e: var Event): bool +proc afterPairValue(c: Context, e: var Event): bool +{.pop.} + +proc init[T](pc: Context, source: T) {.inline.} = + pc.levels.add(Level(state: atStreamStart, indentation: -2)) + pc.headerProps = defaultProperties + pc.inlineProps = defaultProperties + pc.lex.init(source) + +proc generateError(c: Context, message: string): + ref YamlParserError {.raises: [].} = + result = (ref YamlParserError)( + msg: message, parent: nil, mark: c.lex.curStartPos, + lineContent: c.lex.currentLine()) + +proc parseTag(c: Context): TagId = + let handle = c.lex.fullLexeme() + var uri = c.p.tagLib.resolve(handle) + if uri == "": + raise c.generateError("unknown handle: " & escape(handle)) + c.lex.next() + if c.lex.cur != Token.Suffix: + raise c.generateError("unexpected token (expected tag suffix): " & $c.lex.cur) + uri.add(c.lex.evaluated) try: - if not isNil(c.p.callback): - c.p.callback(c.lex.curStartPos.line, c.lex.curStartPos.column, - c.lex.getTokenLine(), msg) - except: - var e = newException(YamlParserError, - "Warning callback raised exception: " & getCurrentExceptionMsg()) - e.parent = getCurrentException() - raise e + return c.p.tagLib.tags[uri] + except KeyError: + return c.p.tagLib.registerUri(uri) -proc initLevel(k: FastParseLevelKind): FastParseLevel {.raises: [], inline.} = - FastParseLevel(kind: k, indentation: UnknownIndentation) +proc toStyle(t: Token): ScalarStyle = + return (case t + of Plain: ssPlain + of SingleQuoted: ssSingleQuoted + of DoubleQuoted: ssDoubleQuoted + of Literal: ssLiteral + of Folded: ssFolded + else: ssAny) -proc emptyScalar(c: ParserContext): YamlStreamEvent {.raises: [], inline.} = - when defined(yamlScalarRepInd): - result = scalarEvent("", c.tag, c.anchor, srPlain) - else: - result = scalarEvent("", c.tag, c.anchor) - c.tag = yTagQuestionMark - c.anchor = yAnchorNone +proc atStreamStart(c: Context, e: var Event): bool = + c.levels[0] = Level(state: atStreamEnd, indentation: -2) + c.levels.add(Level(state: beforeDoc, indentation: -1)) + e = Event(startPos: c.lex.curStartPos, endPos: c.lex.curStartPos, kind: yamlStartStream) + return true -proc currentScalar(c: ParserContext, e: var YamlStreamEvent) - {.raises: [], inline.} = - e = YamlStreamEvent(kind: yamlScalar, scalarTag: c.tag, - scalarAnchor: c.anchor) - shallowCopy(e.scalarContent, c.lex.buf) - c.lex.buf = newStringOfCap(256) - c.tag = yTagQuestionMark - c.anchor = yAnchorNone +proc atStreamEnd(c: Context, e : var Event): bool = + e = Event(startPos: c.lex.curStartPos, + endPos: c.lex.curStartPos, kind: yamlEndStream) + return true -proc objectStart(c: ParserContext, k: static[YamlStreamEventKind], - single: bool = false): YamlStreamEvent {.raises: [].} = - yAssert(c.level.kind == fplUnknown) - when k == yamlStartMap: - result = startMapEvent(c.tag, c.anchor) - if single: - debug("started single-pair map at " & - (if c.level.indentation == UnknownIndentation: - $c.lex.indentation else: $c.level.indentation)) - c.level.kind = fplSinglePairKey +proc beforeDoc(c: Context, e: var Event): bool = + var version = "" + var seenDirectives = false + while true: + case c.lex.cur + of DocumentEnd: + if seenDirectives: + raise c.generateError("Missing `---` after directives") + c.lex.next() + of DirectivesEnd: + c.lex.next() + c.levels[1].state = beforeDocEnd + c.levels.add(Level(state: afterDirectivesEnd, indentation: -1)) + return true + of StreamEnd: + discard c.levels.pop() + return false + of Indentation: + e = Event(kind: yamlStartDoc, explicitDirectivesEnd: false, version: version) + c.levels[^1].state = beforeDocEnd + c.levels.add(Level(state: beforeImplicitRoot, indentation: -1)) + return true + of YamlDirective: + seenDirectives = true + c.lex.next() + if c.lex.cur != Token.DirectiveParam: + raise c.generateError("Invalid token (expected YAML version string): " & $c.lex.cur) + elif version != "": + raise c.generateError("Duplicate %YAML") + version = c.lex.fullLexeme() + if version != "1.2" and c.p.issueWarnings: + discard # TODO + c.lex.next() + of TagDirective: + seenDirectives = true + c.lex.next() + if c.lex.cur != Token.TagHandle: + raise c.generateError("Invalid token (expected tag handle): " & $c.lex.cur) + let tagHandle = c.lex.fullLexeme() + c.lex.next() + if c.lex.cur != Token.Suffix: + raise c.generateError("Invalid token (expected tag URI): " & $c.lex.cur) + c.p.tagLib.registerHandle(tagHandle, c.lex.fullLexeme()) + c.lex.next() + of UnknownDirective: + seenDirectives = true + # TODO: issue warning + while true: + c.lex.next() + if c.lex.cur != Token.DirectiveParam: break else: - debug("started map at " & - (if c.level.indentation == UnknownIndentation: - $c.lex.indentation else: $c.level.indentation)) - c.level.kind = fplMapKey - else: - result = startSeqEvent(c.tag, c.anchor) - debug("started sequence at " & - (if c.level.indentation == UnknownIndentation: $c.lex.indentation else: - $c.level.indentation)) - c.level.kind = fplSequence - c.tag = yTagQuestionMark - c.anchor = yAnchorNone - if c.level.indentation == UnknownIndentation: - c.level.indentation = c.lex.indentation - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) + raise c.generateError("Unexpected token (expected directive or document start): " & $c.lex.cur) -proc initDocValues(c: ParserContext) {.raises: [].} = - c.shorthands = initTable[string, string]() - c.p.anchors = initTable[string, AnchorId]() - c.shorthands["!"] = "!" - c.shorthands["!!"] = "tag:yaml.org,2002:" - c.nextAnchorId = 0.AnchorId - c.level = initLevel(fplUnknown) - c.tag = yTagQuestionMark - c.anchor = yAnchorNone - c.ancestry.add(FastParseLevel(kind: fplDocument, indentation: -1)) - -proc advance(c: ParserContext) {.inline, raises: [YamlParserError].} = - try: c.lex.next() - except YamlLexerError: - let e = (ref YamlLexerError)(getCurrentException()) - let pe = newException(YamlParserError, e.msg) - pe.line = e.line - pe.column = e.column - pe.lineContent = e.lineContent - raise pe - -proc handleAnchor(c: ParserContext) {.raises: [YamlParserError].} = - if c.level.kind != fplUnknown: raise c.generateError("Unexpected token") - if c.anchor != yAnchorNone: - raise c.generateError("Only one anchor is allowed per node") - c.anchor = c.nextAnchorId - c.p.anchors[c.lex.buf] = c.anchor - c.nextAnchorId = AnchorId(int(c.nextAnchorId) + 1) - c.lex.buf.setLen(0) - c.advance() - -proc handleTagHandle(c: ParserContext) {.raises: [YamlParserError].} = - if c.level.kind != fplUnknown: raise c.generateError("Unexpected tag handle") - if c.tag != yTagQuestionMark: - raise c.generateError("Only one tag handle is allowed per node") - if c.lex.cur == ltTagHandle: - var tagUri = "" - try: - tagUri.add(c.shorthands[c.lex.buf[0..c.lex.shorthandEnd]]) - tagUri.add(c.lex.buf[c.lex.shorthandEnd + 1 .. ^1]) - except KeyError: - raise c.generateError( - "Undefined tag shorthand: " & c.lex.buf[0..c.lex.shorthandEnd]) - try: c.tag = c.p.tagLib.tags[tagUri] - except KeyError: c.tag = c.p.tagLib.registerUri(tagUri) - else: - try: c.tag = c.p.tagLib.tags[c.lex.buf] - except KeyError: c.tag = c.p.tagLib.registerUri(c.lex.buf) - c.lex.buf.setLen(0) - c.advance() - -proc handlePossibleMapStart(c: ParserContext, e: var YamlStreamEvent, - flow: bool = false, single: bool = false): bool = - result = false - if c.level.indentation == UnknownIndentation: - if c.lex.isImplicitKeyStart(): - e = c.objectStart(yamlStartMap, single) - result = true - c.level.indentation = c.lex.indentation - -template implicitScalar(): YamlStreamEvent = - when defined(yamlScalarRepInd): - scalarEvent("", yTagQuestionMark, yAnchorNone, srPlain) - else: - scalarEvent("", yTagQuestionMark, yAnchorNone) - -proc handleMapKeyIndicator(c: ParserContext, e: var YamlStreamEvent): bool = - result = false - case c.level.kind - of fplUnknown: - e = c.objectStart(yamlStartMap) - result = true - of fplMapValue: - if c.level.indentation != c.lex.indentation: - raise c.generateError("Invalid p.indentation of map key indicator " & - "(expected" & $c.level.indentation & ", got " & $c.lex.indentation & - ")") - e = implicitScalar() - result = true - c.level.kind = fplMapKey - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplMapKey: - if c.level.indentation != c.lex.indentation: - raise c.generateError("Invalid p.indentation of map key indicator") - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplSequence: - raise c.generateError("Unexpected map key indicator (expected '- ')") - of fplSinglePairKey, fplSinglePairValue, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.advance() - if c.lex.cur != ltIndentation: - # this enables the parser to properly parse compact structures, like - # ? - a - # - b - # and such. At the first `-`, the indentation must equal its level to be - # parsed properly. - c.lex.indentation = c.lex.curStartPos.column - 1 - -proc handleBlockSequenceIndicator(c: ParserContext, e: var YamlStreamEvent): - bool = - result = false - case c.level.kind - of fplUnknown: - e = c.objectStart(yamlStartSeq) - result = true - of fplSequence: - if c.level.indentation != c.lex.indentation: - raise c.generateError( - "Invalid p.indentation of block sequence indicator (expected " & - $c.level.indentation & ", got " & $c.lex.indentation & ")") - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - else: raise c.generateError("Illegal sequence item in map") - c.advance() - if c.lex.cur != ltIndentation: - # see comment in previous proc, this time with structures like - # - - a - # - b - c.lex.indentation = c.lex.curStartPos.column - 1 - -proc handleBlockItemStart(c: ParserContext, e: var YamlStreamEvent): bool = - result = false - case c.level.kind - of fplUnknown: - result = c.handlePossibleMapStart(e) - of fplSequence: - raise c.generateError( - "Unexpected token (expected block sequence indicator)") - of fplMapKey: - c.ancestry.add(c.level) - c.level = FastParseLevel(kind: fplUnknown, indentation: c.lex.indentation) - of fplMapValue: - e = emptyScalar(c) - result = true - c.level.kind = fplMapKey - c.ancestry.add(c.level) - c.level = FastParseLevel(kind: fplUnknown, indentation: c.lex.indentation) - of fplSinglePairKey, fplSinglePairValue, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - -proc handleFlowItemStart(c: ParserContext, e: var YamlStreamEvent): bool = - if c.level.kind == fplUnknown and - c.ancestry[c.ancestry.high].kind == fplSequence: - result = c.handlePossibleMapStart(e, true, true) - else: result = false - -proc handleFlowPlainScalar(c: ParserContext) = - while c.lex.cur in {ltScalarPart, ltEmptyLine}: - c.lex.newlines.inc() - c.advance() - c.lex.newlines = 0 - -proc lastTokenContext(s: YamlStream, line, column: var int, - lineContent: var string): bool = - let c = ParserContext(s) - line = c.lex.curStartPos.line - column = c.lex.curStartPos.column - lineContent = c.lex.getTokenLine(true) - result = true - -# --- macros for defining parser states --- - -template capitalize(s: string): string = - when declared(strutils.capitalizeAscii): strutils.capitalizeAscii(s) - else: strutils.capitalize(s) - -macro parserStates(names: varargs[untyped]) = - ## generates proc declaration for each state in list like this: - ## - ## proc name(s: YamlStream, e: var YamlStreamEvent): - ## bool {.raises: [YamlParserError].} - result = newStmtList() - for name in names: - let nameId = newIdentNode("state" & capitalize(name.strVal)) - result.add(newProc(nameId, [ident("bool"), newIdentDefs(ident("s"), - ident("YamlStream")), newIdentDefs(ident("e"), newNimNode(nnkVarTy).add( - ident("YamlStreamEvent")))], newEmptyNode())) - result[0][4] = newNimNode(nnkPragma).add(newNimNode(nnkExprColonExpr).add( - ident("raises"), newNimNode(nnkBracket).add(ident("YamlParserError"), - ident("YamlLexerError")))) - -proc processStateAsgns(source, target: NimNode) {.compileTime.} = - ## copies children of source to target and replaces all assignments - ## `state = [name]` with the appropriate code for changing states. - for child in source.children: - if child.kind == nnkAsgn and child[0].kind == nnkIdent: - if child[0].strVal == "state": - assert child[1].kind == nnkIdent - var newNameId: NimNode - if child[1].kind == nnkIdent and child[1].strVal == "stored": - newNameId = newDotExpr(ident("c"), ident("storedState")) - else: - newNameId = - newIdentNode("state" & capitalize(child[1].strVal)) - target.add(newAssignment(newDotExpr( - newIdentNode("s"), newIdentNode("nextImpl")), newNameId)) - continue - elif child[0].strVal == "stored": - assert child[1].kind == nnkIdent - let newNameId = - newIdentNode("state" & capitalize(child[1].strVal)) - target.add(newAssignment(newDotExpr(newIdentNode("c"), - newIdentNode("storedState")), newNameId)) - continue - var processed = copyNimNode(child) - processStateAsgns(child, processed) - target.add(processed) - -macro parserState(name: untyped, impl: untyped) = - ## Creates a parser state. Every parser state is a proc with the signature - ## - ## proc(s: YamlStream, e: var YamlStreamEvent): - ## bool {.raises: [YamlParserError].} - ## - ## The proc name will be prefixed with "state" and the original name will be - ## capitalized, so a state "foo" will yield a proc named "stateFoo". - ## - ## Inside the proc, you have access to the ParserContext with the let variable - ## `c`. You can change the parser state by a assignment `state = [newState]`. - ## The [newState] must have been declared with states(...) previously. - let - nameStr = name.strVal - nameId = newIdentNode("state" & capitalize(nameStr)) - var procImpl = quote do: - debug("state: " & `nameStr`) - if procImpl.kind == nnkStmtList and procImpl.len == 1: procImpl = procImpl[0] - procImpl = newStmtList(procImpl) - procImpl.add(newLetStmt(ident("c"), newCall("ParserContext", ident("s")))) - procImpl.add(newAssignment(newIdentNode("result"), newLit(false))) - assert impl.kind == nnkStmtList - processStateAsgns(impl, procImpl) - result = newProc(nameId, [ident("bool"), - newIdentDefs(ident("s"), ident("YamlStream")), newIdentDefs(ident("e"), - newNimNode(nnkVarTy).add(ident("YamlStreamEvent")))], procImpl) - -# --- parser states --- - -parserStates(initial, blockLineStart, blockObjectStart, blockAfterObject, - scalarEnd, plainScalarEnd, objectEnd, expectDocEnd, startDoc, - afterDocument, closeMoreIndentedLevels, afterPlainScalarYield, - emitEmptyScalar, tagHandle, anchor, alias, flow, leaveFlowMap, - leaveFlowSeq, flowAfterObject, leaveFlowSinglePairMap) - -proc closeEverything(c: ParserContext) = - c.lex.indentation = -1 - c.nextImpl = stateCloseMoreIndentedLevels - -proc endLevel(c: ParserContext, e: var YamlStreamEvent): - LevelEndResult = - result = lerOne - case c.level.kind - of fplSequence: e = endSeqEvent() - of fplMapKey: e = endMapEvent() - of fplMapValue, fplSinglePairValue: - e = emptyScalar(c) - c.level.kind = fplMapKey - result = lerAdditionalMapEnd - of fplUnknown: e = emptyScalar(c) - of fplDocument: - when defined(yamlScalarRepInd): - e = endDocEvent(c.lex.cur == ltDocumentEnd) - else: e = endDocEvent() - if c.lex.cur == ltDocumentEnd: c.advance() - of fplSinglePairKey: - internalError("Unexpected level kind: " & $c.level.kind) - -proc handleMapValueIndicator(c: ParserContext, e: var YamlStreamEvent): bool = - result = false - case c.level.kind - of fplUnknown: - if c.level.indentation == UnknownIndentation: - e = c.objectStart(yamlStartMap) - result = true - c.storedState = c.nextImpl - c.nextImpl = stateEmitEmptyScalar - else: - e = emptyScalar(c) - result = true - c.ancestry[c.ancestry.high].kind = fplMapValue - of fplMapKey: - if c.level.indentation != c.lex.indentation: - raise c.generateError("Invalid p.indentation of map key indicator") - e = implicitScalar() - result = true - c.level.kind = fplMapValue - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplMapValue: - if c.level.indentation != c.lex.indentation: - raise c.generateError("Invalid p.indentation of map key indicator") - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplSequence: - raise c.generateError("Unexpected map value indicator (expected '- ')") - of fplSinglePairKey, fplSinglePairValue, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.advance() - if c.lex.cur != ltIndentation: - # see comment in handleMapKeyIndicator, this time with structures like - # a: - a - # - b - c.lex.indentation = c.lex.curStartPos.column - 1 - -template handleObjectEnd(c: ParserContext, mayHaveEmptyValue: bool = false): - bool = - var result = false - c.level = c.ancestry.pop() - when mayHaveEmptyValue: - if c.level.kind == fplSinglePairValue: - result = true - c.level = c.ancestry.pop() - case c.level.kind - of fplMapKey: c.level.kind = fplMapValue - of fplSinglePairKey: c.level.kind = fplSinglePairValue - of fplMapValue: c.level.kind = fplMapKey - of fplSequence, fplDocument: discard - of fplUnknown, fplSinglePairValue: - internalError("Unexpected level kind: " & $c.level.kind) - result - -proc leaveFlowLevel(c: ParserContext, e: var YamlStreamEvent): bool = - c.flowdepth.dec() - result = (c.endLevel(e) == lerOne) # lerAdditionalMapEnd cannot happen - if c.flowdepth == 0: - c.lex.setFlow(false) - c.storedState = stateBlockAfterObject - else: - c.storedState = stateFlowAfterObject - c.nextImpl = stateObjectEnd - c.advance() - -parserState initial: +proc afterDirectivesEnd(c: Context, e: var Event): bool = case c.lex.cur - of ltYamlDirective: - c.advance() - assert c.lex.cur == ltYamlVersion, $c.lex.cur - if c.lex.buf != "1.2": - c.callCallback("Version is not 1.2, but " & c.lex.buf) - c.lex.buf.setLen(0) - c.advance() - of ltTagDirective: - c.advance() - assert c.lex.cur == ltTagShorthand - var tagShorthand: string - shallowCopy(tagShorthand, c.lex.buf) - c.lex.buf = "" - c.advance() - assert c.lex.cur == ltTagUri - c.shorthands[tagShorthand] = c.lex.buf - c.lex.buf.setLen(0) - c.advance() - of ltUnknownDirective: - c.callCallback("Unknown directive: " & c.lex.buf) - c.lex.buf.setLen(0) - c.advance() - if c.lex.cur == ltUnknownDirectiveParams: - c.lex.buf.setLen(0) - c.advance() - of ltIndentation: - e = startDocEvent() - result = true - state = blockObjectStart - of ltStreamEnd: c.isFinished = true - of ltDirectivesEnd: - when defined(yamlScalarRepInd): e = startDocEvent(true) - else: e = startDocEvent() - result = true - c.advance() - state = blockObjectStart - of ltDocumentEnd: - c.advance() - state = afterDocument - else: internalError("Unexpected lexer token: " & $c.lex.cur) - -parserState blockLineStart: - case c.lex.cur - of ltIndentation: c.advance() - of ltEmptyLine: c.advance() - of ltStreamEnd: - c.closeEverything() - stored = afterDocument + of TagHandle, VerbatimTag, Token.Anchor: + c.inlineStart = c.lex.curStartPos + c.levels.add(Level(state: beforeNodeProperties, indentation: 0)) + of Indentation: + c.headerStart = c.inlineStart + c.levels[^1].state = atBlockIndentation + c.levels.add(Level(state: beforeBlockIndentation, indentation: 0)) + of DocumentEnd: + e = scalarEvent("", c.inlineProps, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + of Folded, Literal: + e = scalarEvent(c.lex.evaluated, c.inlineProps, + if c.lex.cur == Token.Folded: ssFolded else: ssLiteral, + c.lex.curStartPos, c.lex.curEndPos) else: - if c.lex.indentation <= c.ancestry[^1].indentation: - state = closeMoreIndentedLevels - stored = blockObjectStart - else: - state = blockObjectStart + raise c.generateError("Illegal content at `---`: " & $c.lex.cur) -parserState blockObjectStart: +proc beforeImplicitRoot(c: Context, e: var Event): bool = + if c.lex.cur != Token.Indentation: + raise c.generateError("Unexpected token (expected line start): " & $c.lex.cur) + c.inlineStart = c.lex.curEndPos + c.levels[^1].indentation = c.lex.indentation + c.lex.next() case c.lex.cur - of ltEmptyLine: c.advance() - of ltIndentation: - c.advance() - c.level.indentation = UnknownIndentation - state = blockLineStart - of ltDirectivesEnd: - c.closeEverything() - stored = startDoc - of ltDocumentEnd: - c.closeEverything() - stored = afterDocument - of ltMapKeyInd: - result = c.handleMapKeyIndicator(e) - of ltMapValInd: - result = c.handleMapValueIndicator(e) - of ltQuotedScalar: - result = c.handleBlockItemStart(e) - c.advance() - state = scalarEnd - of ltBlockScalarHeader: - c.lex.indentation = c.ancestry[^1].indentation - c.advance() - assert c.lex.cur in {ltBlockScalar, ltStreamEnd} - if c.level.indentation == UnknownIndentation: - c.level.indentation = c.lex.indentation - c.advance() - state = scalarEnd - of ltScalarPart: - let needsValueIndicator = c.level.kind == fplMapKey - result = c.handleBlockItemStart(e) - c.plainScalarStart = c.lex.curStartPos - while true: - c.advance() - case c.lex.cur - of ltIndentation: - if c.lex.indentation <= c.ancestry[^1].indentation: - if needsValueIndicator and - c.lex.indentation == c.ancestry[^1].indentation: - raise c.generateError("Illegal multiline implicit key") - break - c.lex.newlines.inc() - of ltScalarPart: discard - of ltEmptyLine: c.lex.newlines.inc() - else: break - if needsValueIndicator and c.lex.cur != ltMapValInd: - raise c.generateError("Missing mapping value indicator (`:`)") - c.lex.newlines = 0 - state = plainScalarEnd - stored = blockAfterObject - of ltSeqItemInd: - result = c.handleBlockSequenceIndicator(e) - of ltTagHandle, ltLiteralTag: - result = c.handleBlockItemStart(e) - state = tagHandle - stored = blockObjectStart - of ltAnchor: - result = c.handleBlockItemStart(e) - state = anchor - stored = blockObjectStart - of ltAlias: - result = c.handleBlockItemStart(e) - state = alias - stored = blockAfterObject - of ltBraceOpen, ltBracketOpen: - result = c.handleBlockItemStart(e) - c.lex.setFlow(true) - state = flow - of ltStreamEnd: - c.closeEverything() - stored = afterDocument - else: - raise c.generateError("Unexpected token: " & $c.lex.cur) - -parserState scalarEnd: - if c.tag == yTagQuestionMark: c.tag = yTagExclamationMark - c.currentScalar(e) - when defined(yamlScalarRepInd): - case c.lex.scalarKind - of skSingleQuoted: e.scalarRep = srSingleQuoted - of skDoubleQuoted: e.scalarRep = srDoubleQuoted - of skLiteral: e.scalarRep = srLiteral - of skFolded: e.scalarRep = srFolded - result = true - state = objectEnd - stored = blockAfterObject - -parserState plainScalarEnd: - c.currentScalar(e) - result = true - c.lastTokenContextImpl = proc(s: YamlStream, line, column: var int, - lineContent: var string): bool {.raises: [].} = - let c = ParserContext(s) - (line, column) = c.plainScalarStart - lineContent = c.lex.getTokenLine(c.plainScalarStart, true) - result = true - state = afterPlainScalarYield - stored = blockAfterObject - -parserState afterPlainScalarYield: - c.lastTokenContextImpl = lastTokenContext - state = objectEnd - -parserState blockAfterObject: - case c.lex.cur - of ltIndentation, ltEmptyLine: - c.advance() - state = blockLineStart - of ltMapValInd: - case c.level.kind - of fplUnknown: - e = c.objectStart(yamlStartMap) - result = true - of fplMapKey: - e = implicitScalar() - result = true - c.level.kind = fplMapValue - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplMapValue: - c.level.kind = fplMapValue - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of fplSequence: raise c.illegalToken("sequence item") - of fplSinglePairKey, fplSinglePairValue, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.advance() - state = blockObjectStart - of ltDirectivesEnd: - c.closeEverything() - stored = startDoc - of ltStreamEnd: - c.closeEverything() - stored = afterDocument - else: raise c.illegalToken("':', comment or line end") - -parserState objectEnd: - if c.handleObjectEnd(true): - e = endMapEvent() - result = true - if c.level.kind == fplDocument: state = expectDocEnd - else: state = stored - -parserState expectDocEnd: - case c.lex.cur - of ltIndentation, ltEmptyLine: c.advance() - of ltDirectivesEnd: - e = endDocEvent() - result = true - state = startDoc - c.ancestry.setLen(0) - of ltDocumentEnd: - when defined(yamlScalarRepInd): e = endDocEvent(true) - else: e = endDocEvent() - result = true - state = afterDocument - c.advance() - of ltStreamEnd: - e = endDocEvent() - result = true - c.isFinished = true - else: - raise c.generateError("Unexpected token (expected document end): " & - $c.lex.cur) - -parserState startDoc: - c.initDocValues() - when defined(yamlScalarRepInd): - e = startDocEvent(c.lex.cur == ltDirectivesEnd) - else: e = startDocEvent() - result = true - c.advance() - state = blockObjectStart - -parserState afterDocument: - case c.lex.cur - of ltStreamEnd: c.isFinished = true - of ltEmptyLine: c.advance() - else: - c.initDocValues() - state = initial - -parserState closeMoreIndentedLevels: - if c.ancestry.len > 0: - let parent = c.ancestry[c.ancestry.high] - if parent.indentation >= c.lex.indentation: - if c.lex.cur == ltSeqItemInd: - if (c.lex.indentation == c.level.indentation and - c.level.kind == fplSequence) or - (c.lex.indentation == parent.indentation and - c.level.kind == fplUnknown and parent.kind != fplSequence): - state = stored - debug("Not closing because sequence indicator") - return false - debug("Closing because parent.indentation (" & $parent.indentation & - ") >= indentation(" & $c.lex.indentation & ")") - case c.endLevel(e) - of lerNothing: discard - of lerOne: result = true - of lerAdditionalMapEnd: return true - discard c.handleObjectEnd(false) - return result - debug("Not closing level because parent.indentation (" & - $parent.indentation & ") < indentation(" & $c.lex.indentation & - ")") - if c.level.kind == fplDocument: state = expectDocEnd - else: state = stored - elif c.lex.indentation == c.level.indentation: - debug("Closing document") - let res = c.endLevel(e) - yAssert(res == lerOne) - result = true - state = stored - else: - state = stored - -parserState emitEmptyScalar: - e = implicitScalar() - result = true - state = stored - -parserState tagHandle: - c.handleTagHandle() - state = stored - -parserState anchor: - c.handleAnchor() - state = stored - -parserState alias: - if c.level.kind != fplUnknown: raise c.generateError("Unexpected token") - if c.anchor != yAnchorNone or c.tag != yTagQuestionMark: - raise c.generateError("Alias may not have anchor or tag") - var id: AnchorId - try: id = c.p.anchors[c.lex.buf] - except KeyError: raise c.generateError("Unknown anchor") - c.lex.buf.setLen(0) - e = aliasEvent(id) - c.advance() - result = true - state = objectEnd - -parserState flow: - case c.lex.cur - of ltBraceOpen: - if c.handleFlowItemStart(e): return true - e = c.objectStart(yamlStartMap) - result = true - c.flowdepth.inc() - c.explicitFlowKey = false - c.advance() - of ltBracketOpen: - if c.handleFlowItemStart(e): return true - e = c.objectStart(yamlStartSeq) - result = true - c.flowdepth.inc() - c.advance() - of ltBraceClose: - yAssert(c.level.kind == fplUnknown) - c.level = c.ancestry.pop() - state = leaveFlowMap - of ltBracketClose: - yAssert(c.level.kind == fplUnknown) - c.level = c.ancestry.pop() - state = leaveFlowSeq - of ltComma: - yAssert(c.level.kind == fplUnknown) - c.level = c.ancestry.pop() - case c.level.kind - of fplSequence: - e = c.emptyScalar() - result = true - of fplMapValue: - e = c.emptyScalar() - result = true - c.level.kind = fplMapKey - c.explicitFlowKey = false - of fplMapKey: - e = c.emptyScalar() - c.level.kind = fplMapValue - return true - of fplSinglePairValue: - e = c.emptyScalar() - result = true - c.level = c.ancestry.pop() - state = leaveFlowSinglePairMap - stored = flow - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - c.advance() - of ltMapValInd: - c.level = c.ancestry.pop() - case c.level.kind - of fplSequence: - e = startMapEvent(c.tag, c.anchor) - result = true - debug("started single-pair map at " & - (if c.level.indentation == UnknownIndentation: - $c.lex.indentation else: $c.level.indentation)) - c.tag = yTagQuestionMark - c.anchor = yAnchorNone - if c.level.indentation == UnknownIndentation: - c.level.indentation = c.lex.indentation - c.ancestry.add(c.level) - c.level = initLevel(fplSinglePairKey) - of fplMapValue, fplSinglePairValue: - raise c.generateError("Unexpected token (expected ',')") - of fplMapKey: - e = c.emptyScalar() - result = true - c.level.kind = fplMapValue - of fplSinglePairKey: - e = c.emptyScalar() - result = true - c.level.kind = fplSinglePairValue - of fplUnknown, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - if c.level.kind != fplSinglePairKey: c.advance() - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - of ltQuotedScalar: - if c.handleFlowItemStart(e): return true - if c.tag == yTagQuestionMark: c.tag = yTagExclamationMark - c.currentScalar(e) - when defined(yamlScalarRepInd): - case c.lex.scalarKind - of skSingleQuoted: e.scalarRep = srSingleQuoted - of skDoubleQuoted: e.scalarRep = srDoubleQuoted - of skLiteral: e.scalarRep = srLiteral - of skFolded: e.scalarRep = srFolded - result = true - state = objectEnd - stored = flowAfterObject - c.advance() - of ltTagHandle, ltLiteralTag: - if c.handleFlowItemStart(e): return true - c.handleTagHandle() - of ltAnchor: - if c.handleFlowItemStart(e): return true - c.handleAnchor() - of ltAlias: - state = alias - stored = flowAfterObject - of ltMapKeyInd: - if c.explicitFlowKey: - raise c.generateError("Duplicate '?' in flow mapping") - elif c.level.kind == fplUnknown: - case c.ancestry[c.ancestry.high].kind - of fplMapKey, fplMapValue, fplDocument: discard - of fplSequence: - e = c.objectStart(yamlStartMap, true) - result = true - else: - raise c.generateError("Unexpected token") - c.explicitFlowKey = true - c.advance() - of ltScalarPart: - if c.handleFlowItemStart(e): return true - c.handleFlowPlainScalar() - c.currentScalar(e) - result = true - state = objectEnd - stored = flowAfterObject - of ltEmptyLine: - c.advance() - else: - raise c.generateError("Unexpected token: " & $c.lex.cur) - -parserState leaveFlowMap: - case c.level.kind - of fplMapValue: - e = c.emptyScalar() - c.level.kind = fplMapKey - return true - of fplMapKey: - if c.tag != yTagQuestionMark or c.anchor != yAnchorNone or - c.explicitFlowKey: - e = c.emptyScalar() - c.level.kind = fplMapValue - c.explicitFlowKey = false - return true - of fplSequence: - raise c.generateError("Unexpected token (expected ']')") - of fplSinglePairValue: - raise c.generateError("Unexpected token (expected ']')") - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - result = c.leaveFlowLevel(e) - -parserState leaveFlowSeq: - case c.level.kind - of fplSequence: - if c.tag != yTagQuestionMark or c.anchor != yAnchorNone: - e = c.emptyScalar() - return true - of fplSinglePairValue: - e = c.emptyScalar() - c.level = c.ancestry.pop() - state = leaveFlowSinglePairMap - stored = leaveFlowSeq - return true - of fplMapKey, fplMapValue: - raise c.generateError("Unexpected token (expected '}')") - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - result = c.leaveFlowLevel(e) - -parserState leaveFlowSinglePairMap: - e = endMapEvent() - result = true - state = stored - -parserState flowAfterObject: - case c.lex.cur - of ltBracketClose: - case c.level.kind - of fplSequence: discard - of fplMapKey, fplMapValue: - raise c.generateError("Unexpected token (expected '}')") - of fplSinglePairValue: - c.level = c.ancestry.pop() - yAssert(c.level.kind == fplSequence) - e = endMapEvent() - return true - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - result = c.leaveFlowLevel(e) - of ltBraceClose: - case c.level.kind - of fplMapKey, fplMapValue: discard - of fplSequence, fplSinglePairValue: - raise c.generateError("Unexpected token (expected ']')") - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - # we need the extra state for possibly emitting an additional empty value. - state = leaveFlowMap + of SeqItemInd, MapKeyInd, MapValueInd: + c.levels[^1].state = afterCompactParent + return false + of scalarTokenKind: + c.levels[^1].state = requireImplicitMapStart + return false + of nodePropertyKind: + c.levels[^1].state = requireImplicitMapStart + c.levels.add(Level(state: beforeNodeProperties, indentation: 0)) + of MapStart, SeqStart: + c.levels[^1].state = afterCompactParentProps return false - of ltComma: - case c.level.kind - of fplSequence: discard - of fplMapValue: - e = implicitScalar() - result = true - c.level.kind = fplMapKey - c.explicitFlowKey = false - of fplSinglePairValue: - c.level = c.ancestry.pop() - yAssert(c.level.kind == fplSequence) - e = endMapEvent() - result = true - of fplMapKey: c.explicitFlowKey = false - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - state = flow - c.advance() - of ltMapValInd: - c.explicitFlowKey = false - case c.level.kind - of fplSequence, fplMapKey: - raise c.generateError("Unexpected token (expected ',')") - of fplMapValue, fplSinglePairValue: discard - of fplUnknown, fplSinglePairKey, fplDocument: - internalError("Unexpected level kind: " & $c.level.kind) - c.ancestry.add(c.level) - c.level = initLevel(fplUnknown) - state = flow - c.advance() - of ltStreamEnd: - raise c.generateError("Unclosed flow content") else: - raise c.generateError("Unexpected content (expected flow indicator)") + raise c.generateError("Unexpected token (expected collection start): " & $c.lex.cur) -# --- parser initialization --- - -proc init(c: ParserContext, p: YamlParser) {.raises: [YamlParserError].} = - # this try/except should not be necessary because basicInit cannot raise - # anything. however, compiling to JS does not work without it. - try: c.basicInit(lastTokenContext) - except: discard - c.p = p - c.ancestry = newSeq[FastParseLevel]() - c.initDocValues() - c.flowdepth = 0 - c.nextImpl = stateInitial - c.explicitFlowKey = false - c.advance() - -when not defined(JS): - proc parse*(p: YamlParser, s: Stream): YamlStream - {.raises: [YamlParserError].} = - ## Parse the given stream as YAML character stream. - let c = new(ParserContext) - try: c.lex = newYamlLexer(s) - except: - let e = newException(YamlParserError, - "Error while opening stream: " & getCurrentExceptionMsg()) - e.parent = getCurrentException() - e.line = 1 - e.column = 1 - e.lineContent = "" - raise e - c.init(p) - result = c - -proc parse*(p: YamlParser, str: string): YamlStream - {.raises: [YamlParserError].} = - ## Parse the given string as YAML character stream. - let c = new(ParserContext) - c.lex = newYamlLexer(str) - c.init(p) - result = c - -proc anchorName*(p: YamlParser, anchor: AnchorId): string {.raises: [].} = - ## Retrieve the textual representation of the given anchor as it occurred in - ## the input (without the leading `&`). Returns the empty string for unknown - ## anchors. - for representation, value in p.anchors: - if value == anchor: return representation - return "" - -proc renderAttrs(p: YamlParser, tag: TagId, anchor: AnchorId, - isPlain: bool): string = - result = "" - if anchor != yAnchorNone: result &= " &" & p.anchorName(anchor) - case tag - of yTagQuestionmark: discard - of yTagExclamationmark: - when defined(yamlScalarRepInd): - if isPlain: result &= " " +proc requireImplicitMapStart(c: Context, e: var Event): bool = + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + let headerEnd = c.lex.curStartPos + c.lex.next() + if c.lex.cur == Token.MapValueInd: + c.peek = e + e = startMapEvent(csBlock, c.headerProps, c.headerStart, headerEnd) + c.headerProps = defaultProperties + c.levels[^1].state = afterImplicitKey + else: + if not isEmpty(c.headerProps): + raise c.generateError("Alias may not have properties") + discard c.levels.pop() + return true + of Plain, SingleQuoted, DoubleQuoted: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), + c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + let headerEnd = c.lex.curStartPos + c.lex.next() + case c.lex.cur + of Token.MapValueInd: + if c.lex.lastScalarWasMultiline(): + raise c.generateError("Implicit mapping key may not be multiline") + c.peek = move(e) + e = startMapEvent(csBlock, c.headerProps, + c.headerStart, headerEnd) + c.headerProps = defaultProperties + c.levels[^1].state = afterImplicitKey + of Indentation, DocumentEnd, DirectivesEnd, StreamEnd: + raise c.generateError("Scalar at root level requires `---`") + else: discard + return true + of MapStart, SeqStart: + c.levels[^1].state = beforeFlowItemProps + return false + of Indentation: + raise c.generateError("Standalone node properties not allowed on non-header line") else: - result &= " <" & p.taglib.uri(tag) & ">" + raise c.generateError("Unexpected token (expected implicit mapping key): " & $c.lex.cur) -proc display*(p: YamlParser, event: YamlStreamEvent): string - {.raises: [KeyError].} = +proc atBlockIndentation(c: Context, e: var Event): bool = + if c.blockIndentation == c.levels[^1].indentation and + (c.lex.cur != Token.SeqItemInd or + c.levels[^3].state == inBlockSeq): + e = scalarEvent(c.lex.evaluated, c.headerProps, ssPlain, + c.headerStart, c.headerStart) + c.headerProps = defaultProperties + discard c.levels.pop() + discard c.levels.pop() + return true + c.inlineStart = c.lex.curStartPos + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of nodePropertyKind: + if isEmpty(c.headerProps): + c.levels[^1].state = requireInlineBlockItem + else: + c.levels[^1].state = requireImplicitMapStart + c.levels.add(Level(state: beforeBlockIndentation, indentation: 0)) + return false + of SeqItemInd: + e = startSeqEvent(csBlock, c.headerProps, + c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.indentation) + c.levels.add(Level(state: beforeBlockIndentation, indentation: 0)) + c.levels.add(Level(state: afterCompactParent, indentation: c.lex.indentation)) + c.lex.next() + return true + of MapKeyInd: + e = startMapEvent(csBlock, c.headerProps, + c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1] = Level(state: beforeBlockMapValue, indentation: 0) + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent, indentation: c.lex.indentation)) + c.lex.next() + of Plain, SingleQuoted, DoubleQuoted: + c.levels[^1].indentation = c.lex.indentation + e = scalarEvent(c.lex.evaluated, c.headerProps, + toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.headerProps = defaultProperties + let headerEnd = c.lex.curStartPos + c.lex.next() + if c.lex.cur == Token.MapValueInd: + if c.lex.lastScalarWasMultiline(): + raise c.generateError("Implicit mapping key may not be multiline") + let props = e.scalarProperties + e.scalarProperties = defaultProperties + c.peek = move(e) + e = startMapEvent(csBlock, props, c.headerStart, headerEnd) + c.levels[^1].state = afterImplicitKey + else: + discard c.levels.pop() + return true + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + let headerEnd = c.lex.curStartPos + c.lex.next() + if c.lex.cur == Token.MapValueInd: + c.peek = move(e) + e = startMapEvent(csBlock, c.headerProps, c.headerStart, headerEnd) + c.headerProps = defaultProperties + c.levels[^1].state = afterImplicitKey + elif not isEmpty(c.headerProps): + raise c.generateError("Alias may not have properties") + else: + discard c.levels.pop() + return true + else: + c.levels[^1].state = atBlockIndentationProps + +proc atBlockIndentationProps(c: Context, e: var Event): bool = + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of MapValueInd: + c.peek = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + e = startMapEvent(csBlock, c.headerProps, c.lex.curStartPos, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1].state = afterImplicitKey + return true + of Plain, SingleQuoted, DoubleQuoted: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + let headerEnd = c.lex.curStartPos + c.lex.next() + if c.lex.cur == Token.MapValueInd: + if c.lex.lastScalarWasMultiline(): + raise c.generateError("Implicit mapping key may not be multiline") + c.peek = move(e) + e = startMapEvent(csBlock, c.headerProps, c.headerStart, headerEnd) + c.headerProps = defaultProperties + c.levels[^1].state = afterImplicitKey + else: + discard c.levels.pop() + return true + of MapStart: + e = startMapEvent(csFlow, c.headerProps, c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1].state = afterFlowMapSep + c.lex.next() + return true + of SeqStart: + e = startSeqEvent(csFlow, c.headerProps, c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1].state = afterFlowSeqSep + c.lex.next() + return true + else: + raise c.generateError("Unexpected token (expected block content): " & $c.lex.cur) + +proc beforeNodeProperties(c: Context, e: var Event): bool = + case c.lex.cur + of TagHandle: + if c.inlineProps.tag != yTagQuestionMark: + raise c.generateError("Only one tag allowed per node") + c.inlineProps.tag = c.parseTag() + of VerbatimTag: + if c.inlineProps.tag != yTagQuestionMark: + raise c.generateError("Only one tag allowed per node") + try: + c.inlineProps.tag = c.p.taglib.tags[c.lex.evaluated] + except KeyError: + c.inlineProps.tag = c.p.taglib.registerUri(c.lex.evaluated) + of Token.Anchor: + if c.inlineProps.anchor != yAnchorNone: + raise c.generateError("Only one anchor allowed per node") + c.inlineProps.anchor = c.lex.shortLexeme().Anchor + of Indentation: + c.headerProps = c.inlineProps + c.inlineProps = defaultProperties + discard c.levels.pop() + return false + of Alias: + raise c.generateError("Alias may not have node properties") + else: + discard c.levels.pop() + return false + c.lex.next() + return false + +proc afterCompactParent(c: Context, e: var Event): bool = + c.inlineStart = c.lex.curStartPos + case c.lex.cur + of nodePropertyKind: + c.levels[^1].state = afterCompactParentProps + c.levels.add(Level(state: beforeNodeProperties)) + of SeqItemInd: + e = startSeqEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1] = Level(state: inBlockSeq, indentation: c.lex.indentation) + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent)) + c.lex.next() + return true + of MapKeyInd: + e = startMapEvent(csBlock, c.headerProps, c.headerStart, c.lex.curEndPos) + c.headerProps = defaultProperties + c.levels[^1] = Level(state: beforeBlockMapValue, indentation: c.lex.indentation) + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent)) + return true + else: + c.levels[^1].state = afterCompactParentProps + return false + +proc afterCompactParentProps(c: Context, e: var Event): bool = + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of nodePropertyKind: + c.levels.add(Level(state: beforeNodeProperties)) + return false + of Indentation: + c.headerStart = c.inlineStart + c.levels[^1] = Level(state: atBlockIndentation, indentation: c.levels[^3].indentation) + c.levels.add(Level(state: beforeBlockIndentation)) + return false + of StreamEnd, DocumentEnd, DirectivesEnd: + e = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curStartPos) + c.inlineProps = defaultProperties + discard c.levels.pop() + return true + of MapValueInd: + c.peek = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curStartPos) + c.inlineProps = defaultProperties + e = startMapEvent(csBlock, defaultProperties, c.lex.curStartPos, c.lex.curStartPos) + c.levels[^1].state = afterImplicitKey + return true + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + let headerEnd = c.lex.curStartPos + c.lex.next() + if c.lex.cur == Token.MapValueInd: + c.peek = move(e) + e = startMapEvent(csBlock, defaultProperties, headerEnd, headerEnd) + c.levels[^1].state = afterImplicitKey + else: + discard c.levels.pop() + return true + of scalarTokenKind: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), + c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + let headerEnd = c.lex.curStartPos + c.levels[^1].indentation = c.lex.indentation + c.lex.next() + if c.lex.cur == Token.MapValueInd: + if c.lex.lastScalarWasMultiline(): + raise c.generateError("Implicit mapping key may not be multiline") + c.peek = move(e) + e = startMapEvent(csBlock, defaultProperties, headerEnd, headerEnd) + c.levels[^1].state = afterImplicitKey + else: + discard c.levels.pop() + return true + of MapStart: + e = startMapEvent(csFlow, c.inlineProps, c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + c.levels[^1].state = afterFlowMapSep + c.lex.next() + return true + of SeqStart: + e = startSeqEvent(csFlow, c.inlineProps, c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + c.levels[^1].state = afterFlowSeqSep + c.lex.next() + return true + else: + raise c.generateError("Unexpected token (expected newline or flow item start: " & $c.lex.cur) + +proc afterBlockParent(c: Context, e: var Event): bool = + c.inlineStart = c.lex.curStartPos + case c.lex.cur + of nodePropertyKind: + c.levels[^1].state = afterBlockParentProps + c.levels.add(Level(state: beforeNodeProperties)) + of SeqItemInd, MapKeyInd: + raise c.generateError("Compact notation not allowed after implicit key") + else: + c.levels[^1].state = afterBlockParentProps + return false + +proc afterBlockParentProps(c: Context, e: var Event): bool = + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of nodePropertyKind: + c.levels.add(Level(state: beforeNodeProperties)) + return false + of MapValueInd: + raise c.generateError("Compact notation not allowed after implicit key") + of scalarTokenKind: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + c.lex.next() + if c.lex.cur == Token.MapValueInd: + raise c.generateError("Compact notation not allowed after implicit key") + discard c.levels.pop() + return true + else: + c.levels[^1].state = afterCompactParentProps + return false + +proc requireInlineBlockItem(c: Context, e: var Event): bool = + c.levels[^1].indentation = c.lex.indentation + case c.lex.cur + of Indentation: + raise c.generateError("Node properties may not stand alone on a line") + else: + c.levels[^1].state = afterCompactParentProps + return false + +proc beforeDocEnd(c: Context, e: var Event): bool = + case c.lex.cur + of DocumentEnd: + e = endDocEvent(false, c.lex.curStartPos, c.lex.curEndPos) + c.levels[^1].state = beforeDoc + c.lex.next() + of StreamEnd: + e = endDocEvent(true, c.lex.curStartPos, c.lex.curEndPos) + discard c.levels.pop() + of DirectivesEnd: + e = endDocEvent(true, c.lex.curStartPos, c.lex.curStartPos) + c.levels[^1].state = beforeDoc + else: + raise c.generateError("Unexpected token (expected document end): " & $c.lex.cur) + return true + +proc inBlockSeq(c: Context, e: var Event): bool = + if c.blockIndentation > c.levels[^1].indentation: + raise c.generateError("Invalid indentation: got " & $c.blockIndentation & ", expected " & $c.levels[^1].indentation) + case c.lex.cur + of SeqItemInd: + c.lex.next() + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent, indentation: c.blockIndentation)) + return false + else: + if c.levels[^3].indentation == c.levels[^1].indentation: + e = endSeqEvent(c.lex.curStartPos, c.lex.curEndPos) + discard c.levels.pop() + discard c.levels.pop() + else: + raise c.generateError("Illegal token (expected block sequence indicator): " & $c.lex.cur) + +proc beforeBlockMapKey(c: Context, e: var Event): bool = + if c.blockIndentation > c.levels[^1].indentation: + raise c.generateError("Invalid indentation: got " & $c.blockIndentation & ", expected " & $c.levels[^1].indentation) + case c.lex.cur + of MapKeyInd: + c.levels[^1].state = beforeBlockMapValue + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent, indentation: c.blockIndentation)) + c.lex.next() + return false + of nodePropertyKind: + c.levels[^1].state = atBlockMapKeyProps + c.levels.add(Level(state: beforeNodeProperties)) + return false + of Plain, SingleQuoted, DoubleQuoted: + c.levels[^1].state = atBlockMapKeyProps + return false + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + c.lex.next() + c.levels[^1].state = afterImplicitKey + return true + of MapValueInd: + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + c.levels[^1].state = beforeBlockMapValue + return true + else: + raise c.generateError("Unexpected token (expected mapping key): " & $c.lex.cur) + +proc atBlockMapKeyProps(c: Context, e: var Event): bool = + case c.lex.cur + of nodePropertyKind: + c.levels.add(Level(state: beforeNodeProperties)) + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + of Plain, SingleQuoted, DoubleQuoted: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + if c.lex.lastScalarWasMultiline(): + raise c.generateError("Implicit mapping key may not be multiline") + of MapValueInd: + e = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curStartPos) + c.inlineProps = defaultProperties + c.levels[^1].state = afterImplicitKey + return true + else: + raise c.generateError("Unexpected token (expected implicit mapping key): " & $c.lex.cur) + c.lex.next() + c.levels[^1].state = afterImplicitKey + return true + +proc afterImplicitKey(c: Context, e: var Event): bool = + if c.lex.cur != Token.MapValueInd: + raise c.generateError("Unexpected token (expected ':'): " & $c.lex.cur) + c.lex.next() + c.levels[^1].state = beforeBlockMapKey + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterBlockParent, indentation: c.blockIndentation)) + return false + +proc beforeBlockMapValue(c: Context, e: var Event): bool = + if c.blockIndentation > c.levels[^1].indentation: + raise c.generateError("Invalid indentation") + case c.lex.cur + of MapValueInd: + c.levels[^1].state = beforeBlockMapKey + c.levels.add(Level(state: beforeBlockIndentation)) + c.levels.add(Level(state: afterCompactParent, indentation: c.blockIndentation)) + c.lex.next() + of MapKeyInd, Plain, SingleQuoted, DoubleQuoted, nodePropertyKind: + # the value is allowed to be missing after an explicit key + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + c.levels[^1].state = beforeBlockMapKey + return true + else: + raise c.generateError("Unexpected token (expected mapping value): " & $c.lex.cur) + +proc beforeBlockIndentation(c: Context, e: var Event): bool = + proc endBlockNode() = + if c.levels[^1].state == beforeBlockMapKey: + e = endMapEvent(c.lex.curStartPos, c.lex.curEndPos) + elif c.levels[^1].state == beforeBlockMapValue: + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + c.levels[^1].state = beforeBlockMapKey + c.levels.add(Level(state: beforeBlockIndentation)) + return + elif c.levels[^1].state == inBlockSeq: + e = endSeqEvent(c.lex.curStartPos, c.lex.curEndPos) + elif c.levels[^1].state == atBlockIndentation: + e = scalarEvent("", c.headerProps, ssPlain, c.headerStart, c.headerStart) + c.headerProps = defaultProperties + elif c.levels[^1].state == beforeBlockIndentation: + raise c.generateError("Unexpected double beforeBlockIndentation") + else: + raise c.generateError("Internal error (please report this bug)") + discard c.levels.pop() + discard c.levels.pop() + case c.lex.cur + of Indentation: + c.blockIndentation = c.lex.indentation + if c.blockIndentation < c.levels[^1].indentation: + endBlockNode() + return true + else: + c.lex.next() + return false + of StreamEnd, DocumentEnd, DirectivesEnd: + c.blockIndentation = 0 + if c.levels[^1].state != beforeDocEnd: + endBlockNode() + return true + else: + return false + else: + raise c.generateError("Unexpected content after node in block context (expected newline): " & $c.lex.cur) + +proc beforeFlowItem(c: Context, e: var Event): bool = + c.inlineStart = c.lex.curStartPos + case c.lex.cur + of nodePropertyKind: + c.levels[^1].state = beforeFlowItemProps + c.levels.add(Level(state: beforeNodeProperties)) + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + return true + else: + c.levels[^1].state = beforeFlowItemProps + return false + +proc beforeFlowItemProps(c: Context, e: var Event): bool = + case c.lex.cur + of nodePropertyKind: + c.levels.add(Level(state: beforeNodeProperties)) + of Alias: + e = aliasEvent(c.lex.shortLexeme().Anchor, c.inlineStart, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + of scalarTokenKind: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + of MapStart: + e = startMapEvent(csFlow, c.inlineProps, c.inlineStart, c.lex.curEndPos) + c.levels[^1].state = afterFlowMapSep + c.lex.next() + of SeqStart: + e = startSeqEvent(csFlow, c.inlineProps, c.inlineStart, c.lex.curEndPos) + c.levels[^1].state = afterFlowSeqSep + c.lex.next() + of MapEnd, SeqEnd, SeqSep, MapValueInd: + e = scalarEvent("", c.inlineProps, ssPlain, c.inlineStart, c.lex.curEndPos) + discard c.levels.pop() + else: + raise c.generateError("Unexpected token (expected flow node): " & $c.lex.cur) + c.inlineProps = defaultProperties + return true + +proc afterFlowMapKey(c: Context, e: var Event): bool = + case c.lex.cur + of MapValueInd: + c.levels[^1].state = afterFlowMapValue + c.levels.add(Level(state: beforeFlowItem)) + c.lex.next() + return false + of SeqSep, MapEnd: + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + c.levels[^1].state = afterFlowMapValue + return true + else: + raise c.generateError("Unexpected token (expected ':'): " & $c.lex.cur) + +proc afterFlowMapValue(c: Context, e: var Event): bool = + case c.lex.cur + of SeqSep: + c.levels[^1].state = afterFlowMapSep + c.lex.next() + return false + of MapEnd: + e = endMapEvent(c.lex.curStartPos, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + return true + of Plain, SingleQuoted, DoubleQuoted, MapKeyInd, Token.Anchor, Alias, MapStart, SeqStart: + raise c.generateError("Missing ','") + else: + raise c.generateError("Unexpected token (expected ',' or '}'): " & $c.lex.cur) + +proc afterFlowSeqItem(c: Context, e: var Event): bool = + case c.lex.cur + of SeqSep: + c.levels[^1].state = afterFlowSeqSep + c.lex.next() + return false + of SeqEnd: + e = endSeqEvent(c.lex.curStartPos, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + return true + of Plain, SingleQuoted, DoubleQuoted, MapKeyInd, Token.Anchor, Alias, MapStart, SeqStart: + raise c.generateError("Missing ','") + else: + raise c.generateError("Unexpected token (expected ',' or ']'): " & $c.lex.cur) + +proc afterFlowMapSep(c: Context, e: var Event): bool = + case c.lex.cur + of MapKeyInd: + c.lex.next() + of MapEnd: + e = endMapEvent(c.lex.curStartPos, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + return true + else: discard + c.levels[^1].state = afterFlowMapKey + c.levels.add(Level(state: beforeFlowItem)) + return false + +proc possibleNextSequenceItem(c: Context, e: var Event, endToken: Token, afterProps, afterItem: State): bool = + c.inlineStart = c.lex.curStartPos + case c.lex.cur + of SeqSep: + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curStartPos) + c.lex.next() + return true + of nodePropertyKind: + c.levels[^1].state = afterProps + c.levels.add(Level(state: beforeNodeProperties)) + return false + of Plain, SingleQuoted, DoubleQuoted: + c.levels[^1].state = afterProps + return false + of MapKeyInd: + c.levels[^1].state = afterItem + e = startMapEvent(csFlow, defaultProperties, c.lex.curStartPos, c.lex.curEndPos) + c.lex.next() + c.levels.add(Level(state: beforePairValue)) + c.levels.add(Level(state: beforeFlowItem)) + return true + of MapValueInd: + c.levels[^1].state = afterItem + e = startMapEvent(csFlow, defaultProperties, c.lex.curStartPos, c.lex.curEndPos) + c.levels.add(Level(state: atEmptyPairKey)) + return true + else: + if c.lex.cur == endToken: + e = endSeqEvent(c.lex.curStartPos, c.lex.curEndPos) + c.lex.next() + discard c.levels.pop() + return true + else: + c.levels[^1].state = afterItem + c.levels.add(Level(state: beforeFlowItem)) + return false + +proc afterFlowSeqSep(c: Context, e: var Event): bool = + return possibleNextSequenceItem(c, e, Token.SeqEnd, afterFlowSeqSepProps, afterFlowSeqItem) + +proc forcedNextSequenceItem(c: Context, e: var Event): bool = + if c.lex.cur in {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted}: + e = scalarEvent(c.lex.evaluated, c.inlineProps, toStyle(c.lex.cur), c.inlineStart, c.lex.curEndPos) + c.inlineProps = defaultProperties + c.lex.next() + if c.lex.cur == Token.MapValueInd: + c.peek = move(e) + e = startMapEvent(csFlow, defaultProperties, c.lex.curStartPos, c.lex.curStartPos) + c.levels.add(Level(state: afterImplicitPairStart)) + return true + else: + c.levels.add(Level(state: beforeFlowItem)) + return false + +proc afterFlowSeqSepProps(c: Context, e: var Event): bool = + c.levels[^1].state = afterFlowSeqItem + return forcedNextSequenceItem(c, e) + +proc atEmptyPairKey(c: Context, e: var Event): bool = + c.levels[^1].state = beforePairValue + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curStartPos) + return true + +proc beforePairValue(c: Context, e: var Event): bool = + if c.lex.cur == Token.MapValueInd: + c.levels[^1].state = afterPairValue + c.levels.add(Level(state: beforeFlowItem)) + c.lex.next() + return false + else: + # pair ends here without value + e = scalarEvent("", defaultProperties, ssPlain, c.lex.curStartPos, c.lex.curEndPos) + discard c.levels.pop() + return true + +proc afterImplicitPairStart(c: Context, e: var Event): bool = + c.lex.next() + c.levels[^1].state = afterPairValue + c.levels.add(Level(state: beforeFLowItem)) + return false + +proc afterPairValue(c: Context, e: var Event): bool = + e = endMapEvent(c.lex.curStartPos, c.lex.curEndPos) + discard c.levels.pop() + return true + +# TODO -------------- + + +proc display*(p: YamlParser, event: Event): string = ## Generate a representation of the given event with proper visualization of ## anchor and tag (if any). The generated representation is conformant to the ## format used in the yaml test suite. @@ -1129,6 +947,8 @@ proc display*(p: YamlParser, event: YamlStreamEvent): string ## finishing the parsing of a document, the parser drops all information about ## anchor and tag names. case event.kind + of yamlStartStream: result = "+STR" + of yamlEndStream: result = "-STR" of yamlEndMap: result = "-MAP" of yamlEndSeq: result = "-SEQ" of yamlStartDoc: @@ -1140,12 +960,12 @@ proc display*(p: YamlParser, event: YamlStreamEvent): string when defined(yamlScalarRepInd): if event.explicitDocumentEnd: result &= " ..." of yamlStartMap: - result = "+MAP" & p.renderAttrs(event.mapTag, event.mapAnchor, true) + result = "+MAP" & renderAttrs(event.mapProperties, true) of yamlStartSeq: - result = "+SEQ" & p.renderAttrs(event.seqTag, event.seqAnchor, true) + result = "+SEQ" & renderAttrs(event.seqProperties, true) of yamlScalar: when defined(yamlScalarRepInd): - result = "=VAL" & p.renderAttrs(event.scalarTag, event.scalarAnchor, + result = "=VAL" & renderAttrs(event.scalarProperties, event.scalarRep == srPlain) case event.scalarRep of srPlain: result &= " :" @@ -1154,10 +974,9 @@ proc display*(p: YamlParser, event: YamlStreamEvent): string of srLiteral: result &= " |" of srFolded: result &= " >" else: - let isPlain = event.scalarTag == yTagExclamationmark - result = "=VAL" & p.renderAttrs(event.scalarTag, event.scalarAnchor, - isPlain) + let isPlain = event.scalarProperties.tag == yTagExclamationmark + result = "=VAL" & renderAttrs(event.scalarProperties, isPlain) if isPlain: result &= " :" else: result &= " \"" result &= yamlTestSuiteEscape(event.scalarContent) - of yamlAlias: result = "=ALI *" & p.anchorName(event.aliasTarget) \ No newline at end of file + of yamlAlias: result = "=ALI *" & $event.aliasTarget \ No newline at end of file diff --git a/yaml/private/lex.nim b/yaml/private/lex.nim index 69b0b63..3fea24d 100644 --- a/yaml/private/lex.nim +++ b/yaml/private/lex.nim @@ -5,69 +5,60 @@ # distribution, for details about the copyright. import lexbase, streams, strutils, unicode +import ../data when defined(yamlDebug): import terminal export terminal -when defined(yamlScalarRepInd): - type ScalarKind* = enum - skSingleQuoted, skDoubleQuoted, skLiteral, skFolded - type - YamlLexerObj* = object - cur*: LexerToken - curStartPos*, curEndPos*: tuple[line, column: int] + Lexer* = object + cur*: Token + curStartPos*, curEndPos*: Mark # recently read scalar or URI, if any - buf*: string + evaluated*: string # ltIndentation indentation*: int - when defined(yamlScalarRepInd): - # ltQuotedScalar, ltBlockScalarHeader - scalarKind*: ScalarKind # internals source: BaseLexer tokenStart: int flowDepth: int - state, lineStartState, jsonEnablingState: LexerState + state, lineStartState, jsonEnablingState: State c: char seenMultiline: bool # indentation of recently started set of node properties. # necessary for implicit keys with properties. propertyIndentation: int - YamlLexer* = ref YamlLexerObj - - YamlLexerError* = object of ValueError + LexerError* = object of ValueError line*, column*: int lineContent*: string - # temporarily missing .raises: [YamlLexerError] + # temporarily missing .raises: [LexerError] # due to https://github.com/nim-lang/Nim/issues/13905 - LexerState = proc(lex: YamlLexer): bool {.locks: 0, gcSafe.} + State = proc(lex: var Lexer): bool {.locks: 0, gcSafe, nimcall.} - LexerToken* = enum - ltYamlDirective, # `%YAML` - ltTagDirective, # `%TAG` - ltUnknownDirective, # any directive but `%YAML` and `%TAG` - ltDirectiveParam, # parameters of %YAML and unknown directives - ltEmptyLine, # for line folding in multiline plain scalars - ltDirectivesEnd, # explicit `---` - ltDocumentEnd, # explicit `...` - ltStreamEnd, # end of input - ltIndentation, # beginning of non-empty line - ltPlainScalar, ltSingleQuotedScalar, ltDoubleQuotedScalar, - ltLiteralScalar, ltFoldedScalar, - ltSeqItemInd, # block sequence item indicator `- ` - ltMapKeyInd, # block mapping key indicator `? ` - ltMapValueInd # block mapping value indicator `: ` - ltMapStart, ltMapEnd, ltSeqStart, ltSeqEnd, ltSeqSep # {}[], - ltTagHandle, # a handle of a tag, e.g. `!!` of `!!str` - ltSuffix, # suffix of a tag shorthand, e.g. `str` of `!!str`. + Token* {.pure.} = enum + YamlDirective, # `%YAML` + TagDirective, # `%TAG` + UnknownDirective, # any directive but `%YAML` and `%TAG` + DirectiveParam, # parameters of %YAML and unknown directives + EmptyLine, # for line folding in multiline plain scalars + DirectivesEnd, # explicit `---` + DocumentEnd, # explicit `...` + StreamEnd, # end of input + Indentation, # beginning of non-empty line + Plain, SingleQuoted, DoubleQuoted, Literal, Folded, + SeqItemInd, # block sequence item indicator `- ` + MapKeyInd, # block mapping key indicator `? ` + MapValueInd # block mapping value indicator `: ` + MapStart, MapEnd, SeqStart, SeqEnd, SeqSep # {}[], + TagHandle, # a handle of a tag, e.g. `!!` of `!!str` + Suffix, # suffix of a tag shorthand, e.g. `str` of `!!str`. # also used for the URI of the %TAG directive - ltVerbatimTag, # a verbatim tag, e.g. `!` - ltAnchor, # anchor property of a node, e.g. `&anchor` - ltAlias # alias node, e.g. `*alias` + VerbatimTag, # a verbatim tag, e.g. `!` + Anchor, # anchor property of a node, e.g. `&anchor` + Alias # alias node, e.g. `*alias` ChompType* = enum ctKeep, ctClip, ctStrip @@ -91,7 +82,9 @@ const tagShorthandChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '-'} suffixChars = {'a' .. 'z', 'A' .. 'Z', '0' .. '9', '#', ';', '/', '?', '@', '&', '=', '+', '$', '_', '.', '!', '~', '*', '\'', '-'} - nodePropertyKind = {ltTagHandle, ltVerbatimTag, ltAnchor} + nodePropertyKind* = {Token.TagHandle, Token.VerbatimTag, Token.Anchor} + scalarTokenKind* = {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted, + Token.Literal, Token.Folded} UTF8NextLine = toUTF8(0x85.Rune) UTF8NonBreakingSpace = toUTF8(0xA0.Rune) @@ -102,11 +95,11 @@ const # lexer source handling -proc advance(lex: YamlLexer, step: int = 1) {.inline.} = +proc advance(lex: var Lexer, step: int = 1) {.inline.} = lex.source.bufpos.inc(step) lex.c = lex.source.buf[lex.source.bufpos] -template lexCR(lex: YamlLexer) = +template lexCR(lex: var Lexer) = try: lex.source.bufpos = lex.source.handleCR(lex.source.bufpos) except: var e = lex.generateError("Encountered stream error: " & @@ -115,7 +108,7 @@ template lexCR(lex: YamlLexer) = raise e lex.c = lex.source.buf[lex.source.bufpos] -template lexLF(lex: YamlLexer) = +template lexLF(lex: var Lexer) = try: lex.source.bufpos = lex.source.handleLF(lex.source.bufpos) except: var e = generateError(lex, "Encountered stream error: " & @@ -124,22 +117,22 @@ template lexLF(lex: YamlLexer) = raise e lex.c = lex.source.buf[lex.source.bufpos] -template lineNumber(lex: YamlLexer): int = +template lineNumber(lex: Lexer): Positive = lex.source.lineNumber -template columnNumber(lex: YamlLexer): int = +template columnNumber(lex: Lexer): Positive = lex.source.getColNumber(lex.source.bufpos) + 1 -template currentLine(lex: YamlLexer): string = +template currentLine(lex: Lexer): string = lex.source.getCurrentLine(true) -proc Safe(lex: YamlLexer): bool {.inline.} = +proc isPlainSafe(lex: Lexer): bool {.inline.} = case lex.source.buf[lex.source.bufpos + 1] of spaceOrLineEnd: result = false of flowIndicators: result = lex.flowDepth == 0 else: result = true -proc lineWithMarker(lex: YamlLexer, pos: tuple[line, column: int], +proc lineWithMarker(lex: Lexer, pos: tuple[line, column: int], marker: bool): string = if pos.line == lex.source.lineNumber: result = lex.source.getCurrentLine(false) @@ -150,25 +143,25 @@ proc lineWithMarker(lex: YamlLexer, pos: tuple[line, column: int], {.push gcSafe, locks: 0.} # `raises` cannot be pushed. -proc outsideDoc(lex: YamlLexer): bool {.raises: [].} -proc yamlVersion(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc tagShorthand(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc tagUri(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc unknownDirParams(lex: YamlLexer): bool {.raises: [].} -proc expectLineEnd(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc lineStart(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc flowLineStart(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc flowLineIndentation(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc insideLine(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc indentationSettingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc afterToken(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc beforeIndentationSettingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc afterJsonEnablingToken(lex: YamlLexer): bool {.raises: YamlLexerError.} -proc lineIndentation(lex: YamlLexer): bool {.raises: [].} -proc lineDirEnd(lex: YamlLexer): bool {.raises: [].} -proc lineDocEnd(lex: YamlLexer): bool {.raises: [].} -proc atSuffix(lex: YamlLexer): bool {.raises: [].} -proc streamEnd(lex: YamlLexer): bool {.raises: [].} +proc outsideDoc(lex: var Lexer): bool {.raises: [].} +proc yamlVersion(lex: var Lexer): bool {.raises: LexerError.} +proc tagShorthand(lex: var Lexer): bool {.raises: LexerError.} +proc tagUri(lex: var Lexer): bool {.raises: LexerError.} +proc unknownDirParams(lex: var Lexer): bool {.raises: [].} +proc expectLineEnd(lex: var Lexer): bool {.raises: LexerError.} +proc lineStart(lex: var Lexer): bool {.raises: LexerError.} +proc flowLineStart(lex: var Lexer): bool {.raises: LexerError.} +proc flowLineIndentation(lex: var Lexer): bool {.raises: LexerError.} +proc insideLine(lex: var Lexer): bool {.raises: LexerError.} +proc indentationSettingToken(lex: var Lexer): bool {.raises: LexerError.} +proc afterToken(lex: var Lexer): bool {.raises: LexerError.} +proc beforeIndentationSettingToken(lex: var Lexer): bool {.raises: LexerError.} +proc afterJsonEnablingToken(lex: var Lexer): bool {.raises: LexerError.} +proc lineIndentation(lex: var Lexer): bool {.raises: [].} +proc lineDirEnd(lex: var Lexer): bool {.raises: [].} +proc lineDocEnd(lex: var Lexer): bool {.raises: [].} +proc atSuffix(lex: var Lexer): bool {.raises: [].} +proc streamEnd(lex: var Lexer): bool {.raises: [].} {.pop.} # helpers @@ -178,28 +171,28 @@ template debug(message: string) {.dirty.} = try: styledWriteLine(stdout, fgBlue, message) except IOError: discard -proc generateError(lex: YamlLexer, message: string): - ref YamlLexerError {.raises: [].} = - result = newException(YamlLexerError, message) +proc generateError(lex: Lexer, message: string): + ref LexerError {.raises: [].} = + result = newException(LexerError, message) result.line = lex.lineNumber() result.column = lex.columnNumber() result.lineContent = lex.currentLine() -proc startToken(lex: YamlLexer) {.inline.} = - lex.curStartPos = (lex.lineNumber(), lex.columnNumber()) +proc startToken(lex: var Lexer) {.inline.} = + lex.curStartPos = (line: lex.lineNumber(), column: lex.columnNumber()) lex.tokenStart = lex.source.bufpos -proc endToken(lex: YamlLexer) {.inline.} = - lex.curEndPos = (lex.lineNumber(), lex.columnNumber()) +proc endToken(lex: var Lexer) {.inline.} = + lex.curEndPos = (line: lex.lineNumber(), column: lex.columnNumber()) -proc readNumericSubtoken(lex: YamlLexer) {.inline.} = +proc readNumericSubtoken(lex: var Lexer) {.inline.} = if lex.c notin digits: raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c)) while true: lex.advance() if lex.c notin digits: break -proc isDirectivesEnd(lex: YamlLexer): bool = +proc isDirectivesEnd(lex: var Lexer): bool = var peek = lex.source.bufpos if lex.source.buf[peek] == '-': peek += 1 @@ -211,7 +204,7 @@ proc isDirectivesEnd(lex: YamlLexer): bool = return true return false -proc isDocumentEnd(lex: YamlLexer): bool = +proc isDocumentEnd(lex: var Lexer): bool = var peek = lex.source.bufpos if lex.source.buf[peek] == '.': peek += 1 @@ -223,7 +216,7 @@ proc isDocumentEnd(lex: YamlLexer): bool = return true return false -proc readHexSequence(lex: YamlLexer, len: int) = +proc readHexSequence(lex: var Lexer, len: int) = var charPos = 0 let startPos = lex.source.bufpos for i in countup(0, len-1): @@ -244,10 +237,10 @@ proc readHexSequence(lex: YamlLexer, len: int) = charPos += coeff * (int(lex.c) - int('A') + 10) else: discard # cannot happen, we checked coeff = coeff div 16 - lex.buf.add($Rune(charPos)) + lex.evaluated.add($Rune(charPos)) -proc readURI(lex: YamlLexer) = - lex.buf.setLen(0) +proc readURI(lex: var Lexer) = + lex.evaluated.setLen(0) let endWithSpace = lex.c != '<' let restricted = lex.flowDepth > 0 var literalStart: int @@ -262,17 +255,17 @@ proc readURI(lex: YamlLexer) = case lex.c of spaceOrLineEnd: if endWithSpace: - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) break raise lex.generateError("Unclosed verbatim tag") of '%': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.readHexSequence(2) literalStart = lex.source.bufpos of uriChars: discard of '[', ']', ',': if restricted: - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) break of '!': if restricted: @@ -280,14 +273,14 @@ proc readURI(lex: YamlLexer) = of '>': if endWithSpace: raise lex.generateError("Illegal character in URI: `>`") - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.advance() break else: raise lex.generateError("Illegal character in URI: " & escape("" & lex.c)) lex.advance() -proc endLine(lex: YamlLexer) = +proc endLine(lex: var Lexer) = while true: case lex.c of '\l': @@ -307,7 +300,7 @@ proc endLine(lex: YamlLexer) = if lex.c in lineEnd: break else: discard -proc startLine(lex: YamlLexer): LineStartType = +proc startLine(lex: var Lexer): LineStartType = case lex.c of '-': return if lex.isDirectivesEnd(): lsDirectivesEndMarker @@ -323,8 +316,8 @@ proc startLine(lex: YamlLexer): LineStartType = of EndOfFile: lsStreamEnd else: lsContent -proc readPlainScalar(lex: YamlLexer) = - lex.buf.setLen(0) +proc readPlainScalar(lex: var Lexer) = + lex.evaluated.setLen(0) let afterNewlineState = if lex.flowDepth == 0: lineIndentation else: flowLineIndentation var lineStartPos: int @@ -333,7 +326,7 @@ proc readPlainScalar(lex: YamlLexer) = if lex.propertyIndentation != -1: lex.indentation = lex.propertyIndentation lex.propertyIndentation = -1 - lex.cur = ltPlainScalar + lex.cur = Token.Plain block multilineLoop: while true: lineStartPos = lex.source.bufpos - 1 @@ -348,48 +341,48 @@ proc readPlainScalar(lex: YamlLexer) = lex.advance() case lex.c of '\l', '\c': - lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) + lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd]) break inlineLoop of EndOfFile: - lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) + lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd]) lex.state = streamEnd break multilineLoop of '#': - lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) + lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd]) lex.state = expectLineEnd break multilineLoop of ':': - if not lex.Safe(): - lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) + if not lex.isPlainSafe(): + lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd]) lex.state = insideLine break multilineLoop break spaceLoop of flowIndicators: if lex.flowDepth > 0: - lex.buf.add(lex.source.buf[lineStartPos..contentEnd]) + lex.evaluated.add(lex.source.buf[lineStartPos..contentEnd]) lex.state = insideLine break multilineLoop break spaceLoop of ' ': discard else: break spaceLoop of ':': - if not lex.Safe(): - lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) + if not lex.isPlainSafe(): + lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.endToken() lex.state = insideLine break multilineLoop of flowIndicators: if lex.flowDepth > 0: - lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.endToken() lex.state = insideLine break multilineLoop of '\l', '\c': - lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) lex.endToken() break inlineLoop of EndOfFile: - lex.buf.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[lineStartPos..lex.source.bufpos - 2]) if lex.columnNumber() > 0: lex.endToken() lex.state = streamEnd @@ -419,31 +412,31 @@ proc readPlainScalar(lex: YamlLexer) = break multilineLoop of lsNewline: lex.endLine() newlines += 1 - if (lex.c == ':' and not lex.Safe()) or + if (lex.c == ':' and not lex.isPlainSafe()) or lex.c == '#' or (lex.c in flowIndicators and lex.flowDepth > 0): lex.state = afterNewlineState break multilineLoop lex.seenMultiline = true - if newlines == 1: lex.buf.add(' ') + if newlines == 1: lex.evaluated.add(' ') else: - for i in countup(2, newlines): lex.buf.add('\l') + for i in countup(2, newlines): lex.evaluated.add('\l') -proc streamEndAfterBlock(lex: YamlLexer) = +proc streamEndAfterBlock(lex: var Lexer) = if lex.columnNumber() != 0: lex.endToken() lex.curEndPos.column -= 1 -proc readBlockScalar(lex: YamlLexer) = +proc readBlockScalar(lex: var Lexer) = var chomp = ctClip indent = 0 separationLines = 0 contentStart: int lex.startToken() - lex.cur = if lex.c == '>': ltFoldedScalar else: ltLiteralScalar - lex.buf.setLen(0) + lex.cur = if lex.c == '>': Token.Folded else: Token.Literal + lex.evaluated.setLen(0) # header while true: @@ -506,12 +499,12 @@ proc readBlockScalar(lex: YamlLexer) = elif lex.columnNumber < indent: break body break for i in countup(0, separationLines - 1): - lex.buf.add('\l') + lex.evaluated.add('\l') block content: contentStart = lex.source.bufpos - 1 while lex.c notin lineEnd: lex.advance() - lex.buf.add(lex.buf[contentStart .. lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[contentStart .. lex.source.bufpos - 2]) separationLines = 0 if lex.c == EndOfFile: lex.state = streamEnd @@ -540,14 +533,14 @@ proc readBlockScalar(lex: YamlLexer) = else: break # line folding - if lex.cur == ltLiteralScalar: + if lex.cur == Token.Literal: for i in countup(0, separationLines - 1): - lex.buf.add('\l') + lex.evaluated.add('\l') elif separationLines == 1: - lex.buf.add(' ') + lex.evaluated.add(' ') else: for i in countup(0, separationLines - 2): - lex.buf.add('\l') + lex.evaluated.add('\l') if lex.columnNumber() > max(0, lex.indentation): if lex.c == '#': @@ -564,13 +557,13 @@ proc readBlockScalar(lex: YamlLexer) = case chomp of ctStrip: discard of ctClip: - if len(lex.buf) > 0: - lex.buf.add('\l') + if len(lex.evaluated) > 0: + lex.evaluated.add('\l') of ctKeep: for i in countup(0, separationLines - 1): - lex.buf.add('\l') + lex.evaluated.add('\l') -proc processQuotedWhitespace(lex: YamlLexer, initial: int) = +proc processQuotedWhitespace(lex: var Lexer, initial: int) = var newlines = initial let firstSpace = lex.source.bufpos - 1 while true: @@ -583,7 +576,7 @@ proc processQuotedWhitespace(lex: YamlLexer, initial: int) = lex.lexCR() break else: - lex.buf.add(lex.source.buf[firstSpace..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[firstSpace..lex.source.bufpos - 2]) return lex.advance() lex.seenMultiline = true @@ -599,14 +592,14 @@ proc processQuotedWhitespace(lex: YamlLexer, initial: int) = raise lex.generateError("Unclosed quoted string") newlines += 1 if newlines == 0: discard - elif newlines == 1: lex.buf.add(' ') + elif newlines == 1: lex.evaluated.add(' ') else: - for i in countup(2, newlines): lex.buf.add('\l') + for i in countup(2, newlines): lex.evaluated.add('\l') -proc readSingleQuotedScalar(lex: YamlLexer) = +proc readSingleQuotedScalar(lex: var Lexer) = lex.seenMultiline = false lex.startToken() - lex.buf.setLen(0) + lex.evaluated.setLen(0) if lex.propertyIndentation != -1: lex.indentation = lex.propertyIndentation lex.propertyIndentation = -1 @@ -617,26 +610,26 @@ proc readSingleQuotedScalar(lex: YamlLexer) = of EndOfFile: raise lex.generateError("Unclosed quoted string") of '\'': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.advance() if lex.c == '\'': - lex.buf.add('\'') + lex.evaluated.add('\'') literalStart = lex.source.bufpos lex.advance() else: break of ' ', '\l', '\c': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.processQuotedWhitespace(1) literalStart = lex.source.bufpos - 1 else: lex.advance() lex.endToken() - lex.cur = ltSingleQuotedScalar + lex.cur = Token.SingleQuoted -proc readDoubleQuotedScalar(lex: YamlLexer) = +proc readDoubleQuotedScalar(lex: var Lexer) = lex.seenMultiline = false lex.startToken() - lex.buf.setLen(0) + lex.evaluated.setLen(0) if lex.propertyIndentation != -1: lex.indentation = lex.propertyIndentation lex.propertyIndentation = -1 @@ -647,27 +640,27 @@ proc readDoubleQuotedScalar(lex: YamlLexer) = of EndOfFile: raise lex.generateError("Unclosed quoted string") of '\\': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.advance() literalStart = lex.source.bufpos case lex.c - of '0': lex.buf.add('\0') - of 'a': lex.buf.add('\a') - of 'b': lex.buf.add('\b') - of 't', '\t': lex.buf.add('\t') - of 'n': lex.buf.add('\l') - of 'v': lex.buf.add('\v') - of 'f': lex.buf.add('\f') - of 'r': lex.buf.add('\c') - of 'e': lex.buf.add('\e') - of ' ': lex.buf.add(' ') - of '"': lex.buf.add('"') - of '/': lex.buf.add('/') - of '\\':lex.buf.add('\\') - of 'N': lex.buf.add(UTF8NextLine) - of '_': lex.buf.add(UTF8NonBreakingSpace) - of 'L': lex.buf.add(UTF8LineSeparator) - of 'P': lex.buf.add(UTF8ParagraphSeparator) + of '0': lex.evaluated.add('\0') + of 'a': lex.evaluated.add('\a') + of 'b': lex.evaluated.add('\b') + of 't', '\t': lex.evaluated.add('\t') + of 'n': lex.evaluated.add('\l') + of 'v': lex.evaluated.add('\v') + of 'f': lex.evaluated.add('\f') + of 'r': lex.evaluated.add('\c') + of 'e': lex.evaluated.add('\e') + of ' ': lex.evaluated.add(' ') + of '"': lex.evaluated.add('"') + of '/': lex.evaluated.add('/') + of '\\':lex.evaluated.add('\\') + of 'N': lex.evaluated.add(UTF8NextLine) + of '_': lex.evaluated.add(UTF8NonBreakingSpace) + of 'L': lex.evaluated.add(UTF8LineSeparator) + of 'P': lex.evaluated.add(UTF8ParagraphSeparator) of 'x': lex.readHexSequence(2) literalStart = lex.source.bufpos @@ -684,10 +677,10 @@ proc readDoubleQuotedScalar(lex: YamlLexer) = else: raise lex.generateError("Illegal character in escape sequence: " & escape("" & lex.c)) of '"': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) break of ' ', '\l', '\c': - lex.buf.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) + lex.evaluated.add(lex.source.buf[literalStart..lex.source.bufpos - 2]) lex.processQuotedWhitespace(1) literalStart = lex.source.bufpos - 1 continue @@ -695,46 +688,49 @@ proc readDoubleQuotedScalar(lex: YamlLexer) = lex.advance() lex.advance() lex.endToken() - lex.cur = ltDoubleQuotedScalar + lex.cur = Token.DoubleQuoted -proc basicInit(lex: YamlLexer) = +proc basicInit(lex: var Lexer) = lex.state = outsideDoc lex.flowDepth = 0 lex.lineStartState = outsideDoc lex.jsonEnablingState = afterToken lex.propertyIndentation = -1 - lex.buf = "" + lex.evaluated = "" lex.advance() # interface -proc shortLexeme*(lex: YamlLexer): string = +proc lastScalarWasMultiline*(lex: Lexer): bool = + result = lex.seenMultiline + +proc shortLexeme*(lex: Lexer): string = return lex.source.buf[lex.tokenStart..lex.source.bufpos-2] -proc fullLexeme*(lex: YamlLexer): string = +proc fullLexeme*(lex: Lexer): string = return lex.source.buf[lex.tokenStart - 1..lex.source.bufpos-2] -proc next*(lex: YamlLexer) = +proc currentLine*(lex: Lexer): string = + return lex.source.getCurrentLine(false) + +proc next*(lex: var Lexer) = while not lex.state(lex): discard debug("lexer -> " & $lex.cur) -proc newYamlLexer*(source: Stream): YamlLexer {.raises: [IOError, OSError].} = - result = new(YamlLexerObj) - result.source.open(source) - result.basicInit() +proc init*(lex: var Lexer, source: Stream) {.raises: [IOError, OSError].} = + lex.source.open(source) + lex.basicInit() -proc newYamlLexer*(source: string): YamlLexer - {.raises: [].} = - result = new(YamlLexerObj) +proc init*(lex: var Lexer, source: string) {.raises: [].} = try: - result.source.open(newStringStream(source)) + lex.source.open(newStringStream(source)) except: discard # can never happen with StringStream - result.basicInit() + lex.basicInit() # states -proc outsideDoc(lex: YamlLexer): bool = +proc outsideDoc(lex: var Lexer): bool = case lex.c of '%': lex.startToken() @@ -746,23 +742,23 @@ proc outsideDoc(lex: YamlLexer): bool = case name of "YAML": lex.state = yamlVersion - lex.cur = ltYamlDirective + lex.cur = Token.YamlDirective of "TAG": lex.state = tagShorthand - lex.cur = ltTagDirective + lex.cur = Token.TagDirective else: lex.state = unknownDirParams - lex.cur = ltUnknownDirective - lex.buf.setLen(0) - lex.buf.add(name) + lex.cur = Token.UnknownDirective + lex.evaluated.setLen(0) + lex.evaluated.add(name) of '-': lex.startToken() if lex.isDirectivesEnd(): lex.state = expectLineEnd - lex.cur = ltDocumentEnd + lex.cur = Token.DocumentEnd else: lex.state = indentationSettingToken - lex.cur = ltIndentation + lex.cur = Token.Indentation lex.lineStartState = lineStart lex.indentation = -1 lex.endToken() @@ -770,12 +766,12 @@ proc outsideDoc(lex: YamlLexer): bool = lex.startToken() if lex.isDocumentEnd(): lex.state = expectLineEnd - lex.cur = ltDocumentEnd + lex.cur = Token.DocumentEnd else: lex.state = indentationSettingToken lex.lineStartState = lineStart lex.indentation = -1 - lex.cur = ltIndentation + lex.cur = Token.Indentation lex.endToken() else: lex.startToken() @@ -784,12 +780,12 @@ proc outsideDoc(lex: YamlLexer): bool = lex.state = expectLineEnd return false lex.endToken() - lex.cur = ltIndentation + lex.cur = Token.Indentation lex.state = indentationSettingToken lex.lineStartState = lineStart return true -proc yamlVersion(lex: YamlLexer): bool = +proc yamlVersion(lex: var Lexer): bool = debug("lex: yamlVersion") while lex.c in space: lex.advance() lex.startToken() @@ -800,11 +796,11 @@ proc yamlVersion(lex: YamlLexer): bool = lex.readNumericSubtoken() if lex.c notin spaceOrLineEnd: raise lex.generateError("Illegal character in YAML version string: " & escape("" & lex.c)) - lex.cur = ltDirectiveParam + lex.cur = Token.DirectiveParam lex.endToken() lex.state = expectLineEnd -proc tagShorthand(lex: YamlLexer): bool = +proc tagShorthand(lex: var Lexer): bool = debug("lex: tagShorthand") while lex.c in space: lex.advance() if lex.c != '!': @@ -823,23 +819,23 @@ proc tagShorthand(lex: YamlLexer): bool = lex.advance() if lex.c notin spaceOrLineEnd: raise lex.generateError("Missing space after tag shorthand") - lex.cur = ltTagHandle + lex.cur = Token.TagHandle lex.endToken() lex.state = tagUri -proc tagUri(lex: YamlLexer): bool = +proc tagUri(lex: var Lexer): bool = debug("lex: tagUri") while lex.c in space: lex.advance() lex.startToken() if lex.c == '<': raise lex.generateError("Illegal character in tag URI: " & escape("" & lex.c)) lex.readUri() - lex.cur = ltSuffix + lex.cur = Token.Suffix lex.endToken() lex.state = expectLineEnd return true -proc unknownDirParams(lex: YamlLexer): bool = +proc unknownDirParams(lex: var Lexer): bool = debug("lex: unknownDirParams") while lex.c in space: lex.advance() if lex.c in lineEnd + {'#'}: @@ -849,10 +845,10 @@ proc unknownDirParams(lex: YamlLexer): bool = while true: lex.advance() if lex.c in lineEnd + {'#'}: break - lex.cur = ltDirectiveParam + lex.cur = Token.DirectiveParam return true -proc expectLineEnd(lex: YamlLexer): bool = +proc expectLineEnd(lex: var Lexer): bool = debug("lex: expectLineEnd") while lex.c in space: lex.advance() if lex.c notin commentOrLineEnd: @@ -860,7 +856,7 @@ proc expectLineEnd(lex: YamlLexer): bool = lex.endLine() return false -proc lineStart(lex: YamlLexer): bool = +proc lineStart(lex: var Lexer): bool = debug("lex: lineStart") return case lex.startLine() of lsDirectivesEndMarker: lex.lineDirEnd() @@ -869,7 +865,7 @@ proc lineStart(lex: YamlLexer): bool = of lsStreamEnd: lex.state = streamEnd; false of lsContent: lex.lineIndentation() -proc flowLineStart(lex: YamlLexer): bool = +proc flowLineStart(lex: var Lexer): bool = var indent: int case lex.c of '-': @@ -889,14 +885,14 @@ proc flowLineStart(lex: YamlLexer): bool = lex.state = insideLine return false -proc flowLineIndentation(lex: YamlLexer): bool = +proc flowLineIndentation(lex: var Lexer): bool = if lex.columnNumber() < lex.indentation: raise lex.generateError("Too few indentation spaces (must surpass surrounding block level)") lex.state = insideLine return false -proc checkIndicatorChar(lex: YamlLexer, kind: LexerToken) = - if lex.Safe(): +proc checkIndicatorChar(lex: var Lexer, kind: Token) = + if lex.isPlainSafe(): lex.readPlainScalar() else: lex.startToken() @@ -905,7 +901,7 @@ proc checkIndicatorChar(lex: YamlLexer, kind: LexerToken) = lex.cur = kind lex.state = beforeIndentationSettingToken -proc enterFlowCollection(lex: YamlLexer, kind: LexerToken) = +proc enterFlowCollection(lex: var Lexer, kind: Token) = lex.startToken() if lex.flowDepth == 0: lex.jsonEnablingState = afterJsonEnablingToken @@ -917,7 +913,7 @@ proc enterFlowCollection(lex: YamlLexer, kind: LexerToken) = lex.endToken() lex.cur = kind -proc leaveFlowCollection(lex: YamlLexer, kind: LexerToken) = +proc leaveFlowCollection(lex: var Lexer, kind: Token) = lex.startToken() if lex.flowDepth == 0: raise lex.generateError("No flow collection to leave!") @@ -930,13 +926,13 @@ proc leaveFlowCollection(lex: YamlLexer, kind: LexerToken) = lex.endToken() lex.cur = kind -proc readNamespace(lex: YamlLexer) = +proc readNamespace(lex: var Lexer) = lex.startToken() lex.advance() if lex.c == '<': lex.readURI() lex.endToken() - lex.cur = ltVerbatimTag + lex.cur = Token.VerbatimTag else: var handleEnd = lex.tokenStart while true: @@ -956,10 +952,10 @@ proc readNamespace(lex: YamlLexer) = raise lex.generateError("Illegal character in tag handle: " & escape("" & lex.c)) lex.advance() lex.endToken() - lex.cur = ltTagHandle + lex.cur = Token.TagHandle lex.state = atSuffix -proc readAnchorName(lex: YamlLexer) = +proc readAnchorName(lex: var Lexer) = lex.startToken() while true: lex.advance() @@ -970,17 +966,17 @@ proc readAnchorName(lex: YamlLexer) = raise lex.generateError("Anchor name must not be empty") lex.state = afterToken -proc insideLine(lex: YamlLexer): bool = +proc insideLine(lex: var Lexer): bool = case lex.c of ':': - lex.checkIndicatorChar(ltMapValueInd) - if lex.cur == ltMapValueInd and lex.propertyIndentation != -1: + lex.checkIndicatorChar(Token.MapValueInd) + if lex.cur == Token.MapValueInd and lex.propertyIndentation != -1: lex.indentation = lex.propertyIndentation lex.propertyIndentation = -1 of '?': - lex.checkIndicatorChar(ltMapKeyInd) + lex.checkIndicatorChar(Token.MapKeyInd) of '-': - lex.checkIndicatorChar(ltSeqItemInd) + lex.checkIndicatorChar(Token.SeqItemInd) of commentOrLineEnd: lex.endLine() return false @@ -996,36 +992,36 @@ proc insideLine(lex: YamlLexer): bool = else: lex.readBlockScalar() of '{': - lex.enterFlowCollection(ltMapStart) + lex.enterFlowCollection(Token.MapStart) of '}': - lex.leaveFlowCollection(ltMapEnd) + lex.leaveFlowCollection(Token.MapEnd) of '[': - lex.enterFlowCollection(ltSeqStart) + lex.enterFlowCollection(Token.SeqStart) of ']': - lex.leaveFlowCollection(ltSeqEnd) + lex.leaveFlowCollection(Token.SeqEnd) of ',': lex.startToken() lex.advance() lex.endToken() - lex.cur = ltSeqSep + lex.cur = Token.SeqSep lex.state = afterToken of '!': lex.readNamespace() of '&': lex.readAnchorName() lex.endToken() - lex.cur = ltAnchor + lex.cur = Token.Anchor of '*': lex.readAnchorName() lex.endToken() - lex.cur = ltAlias + lex.cur = Token.Alias of '@', '`': raise lex.generateError("Reserved character may not start any token") else: lex.readPlainScalar() return true -proc indentationSettingToken(lex: YamlLexer): bool = +proc indentationSettingToken(lex: var Lexer): bool = let cachedIntentation = lex.columnNumber() result = lex.insideLine() if result and lex.flowDepth > 0: @@ -1034,7 +1030,7 @@ proc indentationSettingToken(lex: YamlLexer): bool = else: lex.indentation = cachedIntentation -proc afterToken(lex: YamlLexer): bool = +proc afterToken(lex: var Lexer): bool = while lex.c == ' ': lex.advance() if lex.c in commentOrLineEnd: lex.endLine() @@ -1042,13 +1038,13 @@ proc afterToken(lex: YamlLexer): bool = lex.state = insideLine return false -proc beforeIndentationSettingToken(lex: YamlLexer): bool = +proc beforeIndentationSettingToken(lex: var Lexer): bool = discard lex.afterToken() if lex.state == insideLine: lex.state = indentationSettingToken return false -proc afterJsonEnablingToken(lex: YamlLexer): bool = +proc afterJsonEnablingToken(lex: var Lexer): bool = while lex.c == ' ': lex.advance() while true: case lex.c @@ -1056,7 +1052,7 @@ proc afterJsonEnablingToken(lex: YamlLexer): bool = lex.startToken() lex.advance() lex.endToken() - lex.cur = ltMapValueInd + lex.cur = Token.MapValueInd lex.state = afterToken of '#', '\l', '\c': lex.endLine() @@ -1068,43 +1064,43 @@ proc afterJsonEnablingToken(lex: YamlLexer): bool = lex.state = insideLine return false -proc lineIndentation(lex: YamlLexer): bool = +proc lineIndentation(lex: var Lexer): bool = lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.column = 1 lex.endToken() - lex.cur = ltIndentation + lex.cur = Token.Indentation lex.state = indentationSettingToken return true -proc lineDirEnd(lex: YamlLexer): bool = +proc lineDirEnd(lex: var Lexer): bool = lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.column = 1 lex.endToken() - lex.cur = ltDirectivesEnd + lex.cur = Token.DirectivesEnd lex.indentation = -1 lex.propertyIndentation = -1 return true -proc lineDocEnd(lex: YamlLexer): bool = +proc lineDocEnd(lex: var Lexer): bool = lex.curStartPos.line = lex.source.lineNumber lex.curStartPos.column = 1 lex.endToken() - lex.cur = ltDocumentEnd + lex.cur = Token.DocumentEnd lex.state = expectLineEnd lex.lineStartState = outsideDoc return true -proc atSuffix(lex: YamlLexer): bool = +proc atSuffix(lex: var Lexer): bool = lex.startToken() while lex.c in suffixChars: lex.advance() - lex.buf = lex.fullLexeme() + lex.evaluated = lex.fullLexeme() lex.endToken() - lex.cur = ltSuffix + lex.cur = Token.Suffix lex.state = afterToken return true -proc streamEnd(lex: YamlLexer): bool = +proc streamEnd(lex: var Lexer): bool = lex.startToken() lex.endToken() - lex.cur = ltStreamEnd + lex.cur = Token.StreamEnd return true \ No newline at end of file diff --git a/yaml/stream.nim b/yaml/stream.nim index 3d895e3..9ddf6e8 100644 --- a/yaml/stream.nim +++ b/yaml/stream.nim @@ -12,66 +12,12 @@ ## operate. It is not named ``streams`` to not confuse it with the modle in the ## stdlib with that name. -import hashes -import private/internal, taglib +import data when defined(nimNoNil): {.experimental: "notnil".} -when defined(yamlScalarRepInd): - type ScalarRepresentationIndicator* = enum - srPlain, srSingleQuoted, srDoubleQuoted, srLiteral, srFolded - type - AnchorId* = distinct int ## \ - ## An ``AnchorId`` identifies an anchor in the current document. It - ## becomes invalid as soon as the current document scope is invalidated - ## (for example, because the parser yielded a ``yamlEndDocument`` - ## event). ``AnchorId`` s exists because of efficiency, much like - ## ``TagId`` s. The actual anchor name is a presentation detail and - ## cannot be queried by the user. - - YamlStreamEventKind* = enum - ## Kinds of YAML events that may occur in an ``YamlStream``. Event kinds - ## are discussed in `YamlStreamEvent <#YamlStreamEvent>`_. - yamlStartDoc, yamlEndDoc, yamlStartMap, yamlEndMap, - yamlStartSeq, yamlEndSeq, yamlScalar, yamlAlias - - YamlStreamEvent* = object - ## An element from a `YamlStream <#YamlStream>`_. Events that start an - ## object (``yamlStartMap``, ``yamlStartSeq``, ``yamlScalar``) have - ## an optional anchor and a tag associated with them. The anchor will be - ## set to ``yAnchorNone`` if it doesn't exist. - ## - ## A non-existing tag in the YAML character stream will be resolved to - ## the non-specific tags ``?`` or ``!`` according to the YAML - ## specification. These are by convention mapped to the ``TagId`` s - ## ``yTagQuestionMark`` and ``yTagExclamationMark`` respectively. - ## Mapping is done by a `TagLibrary <#TagLibrary>`_. - case kind*: YamlStreamEventKind - of yamlStartMap: - mapAnchor* : AnchorId - mapTag* : TagId - of yamlStartSeq: - seqAnchor* : AnchorId - seqTag* : TagId - of yamlScalar: - scalarAnchor* : AnchorId - scalarTag* : TagId - scalarContent*: string # may not be nil (but empty) - when defined(yamlScalarRepInd): - scalarRep* : ScalarRepresentationIndicator - of yamlStartDoc: - when defined(yamlScalarRepInd): - explicitDirectivesEnd*: bool - else: discard - of yamlEndDoc: - when defined(yamlScalarRepInd): - explicitDocumentEnd*: bool - of yamlEndMap, yamlEndSeq: discard - of yamlAlias: - aliasTarget* : AnchorId - YamlStream* = ref object of RootObj ## \ ## A ``YamlStream`` is an iterator-like object that yields a ## well-formed stream of ``YamlStreamEvents``. Well-formed means that @@ -85,27 +31,18 @@ type ## and is not required to check for it. The procs in this module will ## always yield a well-formed ``YamlStream`` and expect it to be ## well-formed if they take it as input parameter. - nextImpl*: proc(s: YamlStream, e: var YamlStreamEvent): bool + nextImpl*: proc(s: YamlStream, e: var Event): bool lastTokenContextImpl*: proc(s: YamlStream, line, column: var int, lineContent: var string): bool {.raises: [].} - isFinished*: bool peeked: bool - cached: YamlStreamEvent + cached: Event - YamlStreamError* = object of Exception + YamlStreamError* = object of ValueError ## Exception that may be raised by a ``YamlStream`` when the underlying ## backend raises an exception. The error that has occurred is ## available from ``parent``. -const - yAnchorNone*: AnchorId = (-1).AnchorId ## \ - ## yielded when no anchor was defined for a YAML node - -proc `==`*(left, right: AnchorId): bool {.borrow.} -proc `$`*(id: AnchorId): string {.borrow.} -proc hash*(id: AnchorId): Hash {.borrow.} - proc noLastContext(s: YamlStream, line, column: var int, lineContent: var string): bool {.raises: [].} = (line, column, lineContent) = (-1, -1, "") @@ -117,111 +54,73 @@ proc basicInit*(s: YamlStream, lastTokenContextImpl: ## initialize basic values of the YamlStream. Call this in your constructor ## if you subclass YamlStream. s.peeked = false - s.isFinished = false s.lastTokenContextImpl = lastTokenContextImpl when not defined(JS): type IteratorYamlStream = ref object of YamlStream - backend: iterator(): YamlStreamEvent + backend: iterator(): Event - proc initYamlStream*(backend: iterator(): YamlStreamEvent): YamlStream + proc initYamlStream*(backend: iterator(): Event): YamlStream {.raises: [].} = ## Creates a new ``YamlStream`` that uses the given iterator as backend. result = new(IteratorYamlStream) result.basicInit() IteratorYamlStream(result).backend = backend - result.nextImpl = proc(s: YamlStream, e: var YamlStreamEvent): bool = + result.nextImpl = proc(s: YamlStream, e: var Event): bool = e = IteratorYamlStream(s).backend() - if finished(IteratorYamlStream(s).backend): - s.isFinished = true - result = false - else: result = true + result = true type BufferYamlStream* = ref object of YamlStream pos: int - buf: seq[YamlStreamEvent] + buf: seq[Event] proc newBufferYamlStream*(): BufferYamlStream not nil = result = cast[BufferYamlStream not nil](new(BufferYamlStream)) result.basicInit() result.buf = @[] result.pos = 0 - result.nextImpl = proc(s: YamlStream, e: var YamlStreamEvent): bool = + result.nextImpl = proc(s: YamlStream, e: var Event): bool = let bys = BufferYamlStream(s) - if bys.pos == bys.buf.len: - result = false - s.isFinished = true - else: - e = bys.buf[bys.pos] - inc(bys.pos) - result = true + e = bys.buf[bys.pos] + inc(bys.pos) + result = true -proc put*(bys: BufferYamlStream, e: YamlStreamEvent) {.raises: [].} = +proc put*(bys: BufferYamlStream, e: Event) {.raises: [].} = bys.buf.add(e) -proc next*(s: YamlStream): YamlStreamEvent {.raises: [YamlStreamError].} = +proc next*(s: YamlStream): Event {.raises: [YamlStreamError].} = ## Get the next item of the stream. Requires ``finished(s) == true``. ## If the backend yields an exception, that exception will be encapsulated ## into a ``YamlStreamError``, which will be raised. if s.peeked: s.peeked = false - shallowCopy(result, s.cached) - return + return move(s.cached) else: - yAssert(not s.isFinished) try: while true: if s.nextImpl(s, result): break - yAssert(not s.isFinished) - except YamlStreamError: - let cur = getCurrentException() - var e = newException(YamlStreamError, cur.msg) - e.parent = cur.parent - raise e except Exception: let cur = getCurrentException() var e = newException(YamlStreamError, cur.msg) e.parent = cur raise e -proc peek*(s: YamlStream): YamlStreamEvent {.raises: [YamlStreamError].} = +proc peek*(s: YamlStream): Event {.raises: [YamlStreamError].} = ## Get the next item of the stream without advancing the stream. ## Requires ``finished(s) == true``. Handles exceptions of the backend like ## ``next()``. if not s.peeked: - shallowCopy(s.cached, s.next()) + s.cached = s.next() s.peeked = true shallowCopy(result, s.cached) -proc `peek=`*(s: YamlStream, value: YamlStreamEvent) {.raises: [].} = +proc `peek=`*(s: YamlStream, value: Event) {.raises: [].} = ## Set the next item of the stream. Will replace a previously peeked item, ## if one exists. s.cached = value s.peeked = true -proc finished*(s: YamlStream): bool {.raises: [YamlStreamError].} = - ## ``true`` if no more items are available in the stream. Handles exceptions - ## of the backend like ``next()``. - if s.peeked: result = false - else: - try: - while true: - if s.isFinished: return true - if s.nextImpl(s, s.cached): - s.peeked = true - return false - except YamlStreamError: - let cur = getCurrentException() - var e = newException(YamlStreamError, cur.msg) - e.parent = cur.parent - raise e - except Exception: - let cur = getCurrentException() - var e = newException(YamlStreamError, cur.msg) - e.parent = cur - raise e - proc getLastTokenContext*(s: YamlStream, line, column: var int, lineContent: var string): bool = ## ``true`` if source context information is available about the last returned @@ -229,139 +128,17 @@ proc getLastTokenContext*(s: YamlStream, line, column: var int, ## line content where the last token has been read from. result = s.lastTokenContextImpl(s, line, column, lineContent) -iterator items*(s: YamlStream): YamlStreamEvent +iterator items*(s: YamlStream): Event {.raises: [YamlStreamError].} = ## Iterate over all items of the stream. You may not use ``peek()`` on the ## stream while iterating. - while not s.finished(): yield s.next() + while true: + let e = s.next() + var last = e.kind == yamlEndStream + yield e + if last: break -iterator mitems*(bys: BufferYamlStream): var YamlStreamEvent {.raises: [].} = +iterator mitems*(bys: BufferYamlStream): var Event {.raises: [].} = ## Iterate over all items of the stream. You may not use ``peek()`` on the ## stream while iterating. for e in bys.buf.mitems(): yield e - -proc `==`*(left: YamlStreamEvent, right: YamlStreamEvent): bool {.raises: [].} = - ## compares all existing fields of the given items - if left.kind != right.kind: return false - case left.kind - of yamlStartDoc, yamlEndDoc, yamlEndMap, yamlEndSeq: result = true - of yamlStartMap: - result = left.mapAnchor == right.mapAnchor and left.mapTag == right.mapTag - of yamlStartSeq: - result = left.seqAnchor == right.seqAnchor and left.seqTag == right.seqTag - of yamlScalar: - result = left.scalarAnchor == right.scalarAnchor and - left.scalarTag == right.scalarTag and - left.scalarContent == right.scalarContent - of yamlAlias: result = left.aliasTarget == right.aliasTarget - -proc renderAttrs(tag: TagId, anchor: AnchorId, isPlain: bool = true): string = - result = "" - if anchor != yAnchorNone: result &= " &" & $anchor - case tag - of yTagQuestionmark: discard - of yTagExclamationmark: - when defined(yamlScalarRepInd): - if isPlain: result &= " " - else: - result &= " <" & $tag & ">" - -proc `$`*(event: YamlStreamEvent): string {.raises: [].} = - ## outputs a human-readable string describing the given event. - ## This string is compatible to the format used in the yaml test suite. - case event.kind - of yamlEndMap: result = "-MAP" - of yamlEndSeq: result = "-SEQ" - of yamlStartDoc: - result = "+DOC" - when defined(yamlScalarRepInd): - if event.explicitDirectivesEnd: result &= " ---" - of yamlEndDoc: - result = "-DOC" - when defined(yamlScalarRepInd): - if event.explicitDocumentEnd: result &= " ..." - of yamlStartMap: result = "+MAP" & renderAttrs(event.mapTag, event.mapAnchor) - of yamlStartSeq: result = "+SEQ" & renderAttrs(event.seqTag, event.seqAnchor) - of yamlScalar: - when defined(yamlScalarRepInd): - result = "=VAL" & renderAttrs(event.scalarTag, event.scalarAnchor, - event.scalarRep == srPlain) - case event.scalarRep - of srPlain: result &= " :" - of srSingleQuoted: result &= " \'" - of srDoubleQuoted: result &= " \"" - of srLiteral: result &= " |" - of srFolded: result &= " >" - else: - result = "=VAL" & renderAttrs(event.scalarTag, event.scalarAnchor, - false) - if event.scalarTag == yTagExclamationmark: result &= " \"" - else: result &= " :" - result &= yamlTestSuiteEscape(event.scalarContent) - of yamlAlias: result = "=ALI *" & $event.aliasTarget - -proc tag*(event: YamlStreamEvent): TagId {.raises: [FieldError].} = - ## returns the tag of the given event - case event.kind - of yamlStartMap: result = event.mapTag - of yamlStartSeq: result = event.seqTag - of yamlScalar: result = event.scalarTag - else: raise newException(FieldError, "Event " & $event.kind & " has no tag") - -when defined(yamlScalarRepInd): - proc startDocEvent*(explicit: bool = false): YamlStreamEvent - {.inline, raises: [].} = - ## creates a new event that marks the start of a YAML document - result = YamlStreamEvent(kind: yamlStartDoc, - explicitDirectivesEnd: explicit) - - proc endDocEvent*(explicit: bool = false): YamlStreamEvent - {.inline, raises: [].} = - ## creates a new event that marks the end of a YAML document - result = YamlStreamEvent(kind: yamlEndDoc, explicitDocumentEnd: explicit) -else: - proc startDocEvent*(): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the start of a YAML document - result = YamlStreamEvent(kind: yamlStartDoc) - - proc endDocEvent*(): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the end of a YAML document - result = YamlStreamEvent(kind: yamlEndDoc) - -proc startMapEvent*(tag: TagId = yTagQuestionMark, - anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the start of a YAML mapping - result = YamlStreamEvent(kind: yamlStartMap, mapTag: tag, mapAnchor: anchor) - -proc endMapEvent*(): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the end of a YAML mapping - result = YamlStreamEvent(kind: yamlEndMap) - -proc startSeqEvent*(tag: TagId = yTagQuestionMark, - anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the beginning of a YAML sequence - result = YamlStreamEvent(kind: yamlStartSeq, seqTag: tag, seqAnchor: anchor) - -proc endSeqEvent*(): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that marks the end of a YAML sequence - result = YamlStreamEvent(kind: yamlEndSeq) - -when defined(yamlScalarRepInd): - proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark, - anchor: AnchorId = yAnchorNone, - scalarRep: ScalarRepresentationIndicator = srPlain): - YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that represents a YAML scalar - result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag, - scalarAnchor: anchor, scalarContent: content, - scalarRep: scalarRep) -else: - proc scalarEvent*(content: string = "", tag: TagId = yTagQuestionMark, - anchor: AnchorId = yAnchorNone): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that represents a YAML scalar - result = YamlStreamEvent(kind: yamlScalar, scalarTag: tag, - scalarAnchor: anchor, scalarContent: content) - -proc aliasEvent*(anchor: AnchorId): YamlStreamEvent {.inline, raises: [].} = - ## creates a new event that represents a YAML alias - result = YamlStreamEvent(kind: yamlAlias, aliasTarget: anchor) diff --git a/yaml/taglib.nim b/yaml/taglib.nim index 646e77e..5e11899 100644 --- a/yaml/taglib.nim +++ b/yaml/taglib.nim @@ -13,20 +13,9 @@ ## the serialization API. import tables, macros, hashes, strutils +import data type - TagId* = distinct int ## \ - ## A ``TagId`` identifies a tag URI, like for example - ## ``"tag:yaml.org,2002:str"``. The URI corresponding to a ``TagId`` can - ## be queried from the `TagLibrary <#TagLibrary>`_ which was - ## used to create this ``TagId``; e.g. when you parse a YAML character - ## stream, the ``TagLibrary`` of the parser is the one which generates - ## the resulting ``TagId`` s. - ## - ## URI strings are mapped to ``TagId`` s for efficiency reasons (you - ## do not need to compare strings every time) and to be able to - ## discover unknown tag URIs early in the parsing process. - TagLibrary* = ref object ## A ``TagLibrary`` maps tag URIs to ``TagId`` s. ## @@ -43,87 +32,6 @@ type nextCustomTagId*: TagId tagHandles: Table[string, string] -const - # failsafe schema - - yTagExclamationMark*: TagId = 0.TagId ## ``!`` non-specific tag - yTagQuestionMark* : TagId = 1.TagId ## ``?`` non-specific tag - yTagString* : TagId = 2.TagId ## \ - ## `!!str `_ tag - yTagSequence* : TagId = 3.TagId ## \ - ## `!!seq `_ tag - yTagMapping* : TagId = 4.TagId ## \ - ## `!!map `_ tag - - # json & core schema - - yTagNull* : TagId = 5.TagId ## \ - ## `!!null `_ tag - yTagBoolean* : TagId = 6.TagId ## \ - ## `!!bool `_ tag - yTagInteger* : TagId = 7.TagId ## \ - ## `!!int `_ tag - yTagFloat* : TagId = 8.TagId ## \ - ## `!!float `_ tag - - # other language-independent YAML types (from http://yaml.org/type/ ) - - yTagOrderedMap* : TagId = 9.TagId ## \ - ## `!!omap `_ tag - yTagPairs* : TagId = 10.TagId ## \ - ## `!!pairs `_ tag - yTagSet* : TagId = 11.TagId ## \ - ## `!!set `_ tag - yTagBinary* : TagId = 12.TagId ## \ - ## `!!binary `_ tag - yTagMerge* : TagId = 13.TagId ## \ - ## `!!merge `_ tag - yTagTimestamp* : TagId = 14.TagId ## \ - ## `!!timestamp `_ tag - yTagValue* : TagId = 15.TagId ## \ - ## `!!value `_ tag - yTagYaml* : TagId = 16.TagId ## \ - ## `!!yaml `_ tag - - yTagNimField* : TagId = 100.TagId ## \ - ## This tag is used in serialization for the name of a field of an - ## object. It may contain any string scalar that is a valid Nim symbol. - - yFirstStaticTagId* : TagId = 1000.TagId ## \ - ## The first ``TagId`` assigned by the ``setTagId`` templates. - - yFirstCustomTagId* : TagId = 10000.TagId ## \ - ## The first ``TagId`` which should be assigned to an URI that does not - ## exist in the ``YamlTagLibrary`` which is used for parsing. - - yamlTagRepositoryPrefix* = "tag:yaml.org,2002:" - nimyamlTagRepositoryPrefix* = "tag:nimyaml.org,2016:" - -proc `==`*(left, right: TagId): bool {.borrow.} -proc hash*(id: TagId): Hash {.borrow.} - -proc `$`*(id: TagId): string {.raises: [].} = - case id - of yTagQuestionMark: "?" - of yTagExclamationMark: "!" - of yTagString: "!!str" - of yTagSequence: "!!seq" - of yTagMapping: "!!map" - of yTagNull: "!!null" - of yTagBoolean: "!!bool" - of yTagInteger: "!!int" - of yTagFloat: "!!float" - of yTagOrderedMap: "!!omap" - of yTagPairs: "!!pairs" - of yTagSet: "!!set" - of yTagBinary: "!!binary" - of yTagMerge: "!!merge" - of yTagTimestamp: "!!timestamp" - of yTagValue: "!!value" - of yTagYaml: "!!yaml" - of yTagNimField: "!nim:field" - else: "<" & $int(id) & ">" - proc initTagLibrary*(): TagLibrary {.raises: [].} = ## initializes the ``tags`` table and sets ``nextCustomTagId`` to ## ``yFirstCustomTagId``. @@ -301,6 +209,12 @@ proc searchHandle*(tagLib: TagLibrary, tag: string): result.len = key.len result.handle = value +proc resolve*(tagLib: TagLibrary, handle: string): string {.raises: [].} = + ## try to resolve the given tag handle. + ## return the registered URI if the tag handle is found. + ## if the handle is unknown, return the empty string. + return tagLib.tagHandles.getOrDefault(handle, "") + iterator handles*(tagLib: TagLibrary): tuple[prefix, handle: string] = ## iterate over registered tag handles that may be used as shortcuts ## (e.g. ``!n!`` for ``tag:nimyaml.org,2016:``)