diff --git a/src/private/dumper.nim b/src/private/dumper.nim new file mode 100644 index 0000000..5737201 --- /dev/null +++ b/src/private/dumper.nim @@ -0,0 +1,366 @@ +type + DumperState = enum + dBlockExplicitMapKey, dBlockExplicitMapValue, dBlockImplicitMapKey, + dBlockImplicitMapValue, dBlockSequenceItem, dFlowImplicitMapKey, + dFlowImplicitMapValue, dFlowExplicitMapKey, dFlowExplicitMapValue, + dFlowSequenceItem, dFlowImplicitMapStart, dFlowExplicitMapStart, + dFlowSequenceStart + +proc needsEscaping(scalar: string): bool = + scalar.find({'{', '}', '[', ']', ',', '#', '-', ':', '?', '%', '\x0A', + '\c'}) != -1 + +proc writeDoubleQuoted(scalar: string, s: Stream) = + s.write('"') + for c in scalar: + if c == '"': + s.write('\\') + s.write(c) + s.write('"') + +proc startItem(target: Stream, style: YamlDumpStyle, indentation: int, + state: var DumperState) = + case state + of dBlockExplicitMapValue: + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write("? ") + state = dBlockExplicitMapKey + of dBlockExplicitMapKey: + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write(": ") + state = dBlockExplicitMapValue + of dBlockImplicitMapValue: + target.write('\x0A') + target.write(repeat(' ', indentation)) + state = dBlockImplicitMapKey + of dBlockImplicitMapKey: + target.write(": ") + state = dBlockImplicitMapValue + of dFlowExplicitMapKey: + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write(": ") + state = dFlowExplicitMapValue + of dFlowExplicitMapValue: + target.write(",\x0A") + target.write(repeat(' ', indentation)) + target.write("? ") + state = dFlowExplicitMapKey + of dFlowImplicitMapStart: + if style == yDumpJson: + target.write("\x0A") + target.write(repeat(' ', indentation)) + state = dFlowImplicitMapKey + of dFlowExplicitMapStart: + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write("? ") + state = dFlowExplicitMapKey + of dFlowImplicitMapKey: + target.write(": ") + state = dFlowImplicitMapValue + of dFlowImplicitMapValue: + if style == yDumpJson: + target.write(",\x0A") + target.write(repeat(' ', indentation)) + else: + target.write(", ") + state = dFlowImplicitMapKey + of dBlockSequenceItem: + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write("- ") + of dFlowSequenceStart: + case style + of yDumpMinimal, yDumpDefault: + discard + of yDumpCanonical, yDumpJson: + target.write('\x0A') + target.write(repeat(' ', indentation)) + of yDumpBlockOnly: + discard # can never happen + state = dFlowSequenceItem + of dFlowSequenceItem: + case style + of yDumpMinimal, yDumpDefault: + target.write(", ") + of yDumpCanonical, yDumpJson: + target.write(",\x0A") + target.write(repeat(' ', indentation)) + of yDumpBlockOnly: + discard # can never happen + +proc writeTagAndAnchor(target: Stream, tag: TagId, tagLib: YamlTagLibrary, + anchor: AnchorId) = + if tag notin [tagQuestionMark, tagExclamationMark]: + let tagUri = tagLib.uri(tag) + if tagUri.startsWith("tag:yaml.org,2002:"): + target.write("!!") + target.write(tagUri[18..^1]) + target.write(' ') + else: + target.write("!<") + target.write(tagUri) + target.write("> ") + if anchor != anchorNone: + target.write("&") + # TODO: properly select an anchor + target.write(cast[byte]('a') + cast[byte](anchor)) + target.write(' ') + +proc dump*(s: YamlStream, target: Stream, tagLib: YamlTagLibrary, + style: YamlDumpStyle = yDumpDefault, indentationStep: int = 2) = + var + cached = initQueue[YamlStreamEvent]() + cacheIterator = iterator(): YamlStreamEvent = + while true: + while cached.len > 0: + yield cached.dequeue() + let item = s() + if finished(s): + break + cached.enqueue(item) + indentation = 0 + levels = newSeq[DumperState]() + + for item in cacheIterator(): + case item.kind + of yamlStartDocument: + if style != yDumpJson: + # TODO: tag directives + target.write("%YAML 1.2\x0A--- ") + of yamlScalar: + if levels.len == 0: + if style != yDumpJson: + target.write('\x0A') + else: + startItem(target, style, indentation, levels[levels.high]) + if style != yDumpJson: + writeTagAndAnchor(target, + item.scalarTag, tagLib, item.scalarAnchor) + + if item.scalarContent.needsEscaping or + style in [yDumpCanonical, yDumpJson]: + writeDoubleQuoted(item.scalarContent, target) + else: + target.write(item.scalarContent) + of yamlAlias: + if levels.len == 0: + raise newException(ValueError, "Malformed YamlStream") + else: + startItem(target, style, indentation, levels[levels.high]) + target.write('*') + target.write(cast[byte]('a') + cast[byte](item.aliasTarget)) + of yamlStartSequence: + var nextState: DumperState + case style + of yDumpDefault: + var length = 0 + while true: + let next = s() + if finished(s): + raise newException(ValueError, "Malformed YamlStream") + cached.enqueue(next) + case next.kind + of yamlScalar: + length += 2 + next.scalarContent.len + of yamlAlias: + length += 6 + of yamlEndSequence: + break + else: + length = int.high + break + nextState = if length <= 60: dFlowSequenceStart else: + dBlockSequenceItem + of yDumpMinimal, yDumpJson, yDumpCanonical: + nextState = dFlowSequenceStart + of yDumpBlockOnly: + nextState = dBlockSequenceItem + + if levels.len == 0: + if nextState == dBlockSequenceItem: + if style != yDumpJson: + writeTagAndAnchor(target, + item.seqTag, tagLib, item.seqAnchor) + else: + if style != yDumpJson: + target.write('\x0A') + writeTagAndAnchor(target, + item.seqTag, tagLib, item.seqAnchor) + indentation += indentationStep + else: + if nextState == dBlockSequenceItem: + if style != yDumpJson: + writeTagAndAnchor(target, + item.seqTag, tagLib, item.seqAnchor) + startItem(target, style, indentation, levels[levels.high]) + else: + startItem(target, style, indentation, levels[levels.high]) + if style != yDumpJson: + writeTagAndAnchor(target, + item.seqTag, tagLib, item.seqAnchor) + indentation += indentationStep + + if nextState == dFlowSequenceStart: + target.write('[') + if levels.len > 0 and style in [yDumpJson, yDumpCanonical] and + levels[levels.high] in + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + indentation += indentationStep + levels.add(nextState) + of yamlStartMap: + var nextState: DumperState + case style + of yDumpDefault: + var length = 0 + while true: + let next = s() + if finished(s): + raise newException(ValueError, "Malformed YamlStream") + cached.enqueue(next) + case next.kind + of yamlScalar: + length += 2 + next.scalarContent.len + of yamlAlias: + length += 6 + of yamlEndMap: + break + else: + length = int.high + break + nextState = if length <= 60: dFlowImplicitMapStart else: + if item.mapMayHaveKeyObjects: + dBlockExplicitMapValue else: dBlockImplicitMapValue + of yDumpMinimal: + nextState = if item.mapMayHaveKeyObjects: + dFlowExplicitMapStart else: dFlowImplicitMapStart + of yDumpCanonical: + nextState = dFlowExplicitMapStart + of yDumpJson: + nextState = dFlowImplicitMapStart + of yDumpBlockOnly: + nextState = if item.mapMayHaveKeyObjects: + dBlockExplicitMapValue else: dBlockImplicitMapValue + + if levels.len == 0: + if nextState in + [dBlockExplicitMapValue, dBlockImplicitMapValue]: + if style != yDumpJson: + writeTagAndAnchor(target, + item.mapTag, tagLib, item.mapAnchor) + else: + if style != yDumpJson: + target.write('\x0A') + writeTagAndAnchor(target, + item.mapTag, tagLib, item.mapAnchor) + indentation += indentationStep + else: + if nextState in + [dBlockExplicitMapValue, dBlockImplicitMapValue]: + if style != yDumpJson: + writeTagAndAnchor(target, + item.mapTag, tagLib, item.mapAnchor) + startItem(target, style, indentation, levels[levels.high]) + else: + startItem(target, style, indentation, levels[levels.high]) + if style != yDumpJson: + writeTagAndAnchor(target, + item.mapTag, tagLib, item.mapAnchor) + indentation += indentationStep + + if nextState in [dFlowImplicitMapStart, dFlowExplicitMapStart]: + target.write('{') + if levels.len > 0 and style in [yDumpJson, yDumpCanonical] and + levels[levels.high] in + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + indentation += indentationStep + levels.add(nextState) + + of yamlEndSequence: + if levels.len == 0: + raise newException(ValueError, "Malformed YamlStream") + case levels.pop() + of dFlowSequenceItem: + case style + of yDumpDefault, yDumpMinimal, yDumpBlockOnly: + target.write(']') + of yDumpJson, yDumpCanonical: + indentation -= indentationStep + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write(']') + if levels.len == 0 or levels[levels.high] notin + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + continue + of dFlowSequenceStart: + if levels.len > 0 and style in [yDumpJson, yDumpCanonical] and + levels[levels.high] in + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + indentation -= indentationStep + target.write(']') + of dBlockSequenceItem: + discard + else: + raise newException(ValueError, "Malformed YamlStream") + indentation -= indentationStep + of yamlEndMap: + if levels.len == 0: + raise newException(ValueError, "Malformed YamlStream") + case levels.pop() + of dFlowImplicitMapValue, dFlowExplicitMapValue: + case style + of yDumpDefault, yDumpMinimal, yDumpBlockOnly: + target.write('}') + of yDumpJson, yDumpCanonical: + indentation -= indentationStep + target.write('\x0A') + target.write(repeat(' ', indentation)) + target.write('}') + if levels.len == 0 or levels[levels.high] notin + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + continue + of dFlowImplicitMapStart, dFlowExplicitMapStart: + if levels.len > 0 and style in [yDumpJson, yDumpCanonical] and + levels[levels.high] in + [dBlockExplicitMapKey, dBlockExplicitMapValue, + dBlockImplicitMapKey, dBlockImplicitMapValue, + dBlockSequenceItem]: + indentation -= indentationStep + target.write('}') + of dBlockImplicitMapValue, dBlockExplicitMapValue: + discard + else: + raise newException(ValueError, "Malformed YamlStream") + indentation -= indentationStep + of yamlEndDocument: + let next = s() + if finished(s): + break + target.write("...\x0A") + cached.enqueue(next) + of yamlWarning: + discard + of yamlError: + raise newException(ValueError, "(" & $item.line & ", " & + $item.column & "): " & item.description) + +proc transform*(input: Stream, output: Stream, style: YamlDumpStyle, + indentationStep: int = 2) = + var + tagLib = extendedTagLibrary() + parser = newParser(tagLib) + dump(parser.parse(input), output, tagLib, style, + indentationStep) \ No newline at end of file diff --git a/src/private/json.nim b/src/private/json.nim index fd58ee8..f62c5fd 100644 --- a/src/private/json.nim +++ b/src/private/json.nim @@ -63,12 +63,12 @@ proc parseToJson*(s: Stream): seq[JsonNode] = result.add(levels.pop().node) of yamlStartSequence: levels.add(initLevel(newJArray())) - if event.objAnchor != anchorNone: - anchors[event.objAnchor] = levels[levels.high].node + if event.seqAnchor != anchorNone: + anchors[event.seqAnchor] = levels[levels.high].node of yamlStartMap: levels.add(initLevel(newJObject())) - if event.objAnchor != anchorNone: - anchors[event.objAnchor] = levels[levels.high].node + if event.mapAnchor != anchorNone: + anchors[event.mapAnchor] = levels[levels.high].node of yamlScalar: if levels.len == 0: # parser ensures that next event will be yamlEndDocument diff --git a/src/private/sequential.nim b/src/private/sequential.nim index 0960514..ba46626 100644 --- a/src/private/sequential.nim +++ b/src/private/sequential.nim @@ -42,9 +42,12 @@ proc `==`*(left: YamlStreamEvent, right: YamlStreamEvent): bool = case left.kind of yamlStartDocument, yamlEndDocument, yamlEndMap, yamlEndSequence: result = true - of yamlStartMap, yamlStartSequence: - result = left.objAnchor == right.objAnchor and - left.objTag == right.objTag + of yamlStartMap: + result = left.mapAnchor == right.mapAnchor and + left.mapTag == right.mapTag + of yamlStartSequence: + result = left.seqAnchor == right.seqAnchor and + left.seqTag == right.seqTag of yamlScalar: result = left.scalarAnchor == right.scalarAnchor and left.scalarTag == right.scalarTag and @@ -109,11 +112,25 @@ template yieldScalar(content: string, typeHint: YamlTypeHint, scalarContent: content, scalarType: typeHint) -template yieldStart(k: YamlStreamEventKind) {.dirty.} = +template yieldStartMap() {.dirty.} = when defined(yamlDebug): - echo "Parser token [mode=", level.mode, ", state=", state, "]: ", k - yield YamlStreamEvent(kind: k, objAnchor: resolveAnchor(parser, anchor), - objTag: resolveTag(parser, tag)) + echo "Parser token [mode=", level.mode, ", state=", state, "]: yamlStartMap" + yield YamlStreamEvent(kind: yamlStartMap, + mapAnchor: resolveAnchor(parser, anchor), + mapTag: resolveTag(parser, tag)) + +template yieldStartSequence() {.dirty.} = + when defined(yamlDebug): + echo "Parser token [mode=", level.mode, ", state=", state, "]: yamlStartSequence" + yield YamlStreamEvent(kind: yamlStartSequence, + seqAnchor: resolveAnchor(parser, anchor), + seqTag: resolveTag(parser, tag)) + +template yieldStart(t: YamlStreamEventKind) {.dirty.} = + when t == yamlStartMap: + yieldStartMap() + else: + yieldStartSequence() template yieldDocumentEnd() {.dirty.} = yield YamlStreamEvent(kind: yamlEndDocument) @@ -495,8 +512,8 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream = if level.mode in [mUnknown, mScalar]: # tags and anchors are for key scalar, not for map. yield YamlStreamEvent(kind: yamlStartMap, - objAnchor: anchorNone, - objTag: tagQuestionMark) + mapAnchor: anchorNone, + mapTag: tagQuestionMark) level.mode = mBlockMapValue ancestry.add(level) level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, @@ -533,8 +550,8 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream = assert level.mode in [mUnknown, mImplicitBlockMapKey] if level.mode == mUnknown: yield YamlStreamEvent(kind: yamlStartMap, - objAnchor: anchorNone, - objTag: tagQuestionMark) + mapAnchor: anchorNone, + mapTag: tagQuestionMark) level.mode = mBlockMapValue ancestry.add(level) level = DocumentLevel(mode: mUnknown, indicatorColumn: -1, diff --git a/src/yaml.nim b/src/yaml.nim index 2443b14..91f54cd 100644 --- a/src/yaml.nim +++ b/src/yaml.nim @@ -1,4 +1,4 @@ -import streams, unicode, lexbase, tables, strutils, json, hashes +import streams, unicode, lexbase, tables, strutils, json, hashes, queues type YamlTypeHint* = enum @@ -15,9 +15,13 @@ type YamlStreamEvent* = object case kind*: YamlStreamEventKind - of yamlStartMap, yamlStartSequence: - objAnchor* : AnchorId - objTag* : TagId + of yamlStartMap: + mapAnchor* : AnchorId + mapTag* : TagId + mapMayHaveKeyObjects* : bool + of yamlStartSequence: + seqAnchor* : AnchorId + seqTag* : TagId of yamlScalar: scalarAnchor* : AnchorId scalarTag* : TagId @@ -41,33 +45,50 @@ type YamlSequentialParser* = ref object tagLib: YamlTagLibrary anchors: OrderedTable[string, AnchorId] - + + YamlDumpStyle* = enum + yDumpMinimal, yDumpCanonical, yDumpDefault, yDumpJson, yDumpBlockOnly const # failsafe schema - tagExclamationMark*: TagId = 0.TagId # "!" non-specific tag - tagQuestionMark* : TagId = 1.TagId # "?" non-specific tag - tagString* : TagId = 2.TagId # !!str tag - tagSequence* : TagId = 3.TagId # !!seq tag - tagMap* : TagId = 4.TagId # !!map tag + tagExclamationMark*: TagId = 0.TagId ## ``!`` non-specific tag + tagQuestionMark* : TagId = 1.TagId ## ``?`` non-specific tag + tagString* : TagId = 2.TagId ## \ + ## `!!str `_ tag + tagSequence* : TagId = 3.TagId ## \ + ## `!!seq `_ tag + tagMap* : TagId = 4.TagId ## \ + ## `!!map `_ tag # json & core schema - tagNull* : TagId = 5.TagId # !!null tag - tagBoolean* : TagId = 6.TagId # !!bool tag - tagInteger* : TagId = 7.TagId # !!int tag - tagFloat* : TagId = 8.TagId # !!float tag + tagNull* : TagId = 5.TagId ## \ + ## `!!null `_ tag + tagBoolean* : TagId = 6.TagId ## \ + ## `!!bool `_ tag + tagInteger* : TagId = 7.TagId ## \ + ## `!!int `_ tag + tagFloat* : TagId = 8.TagId ## \ + ## `!!float `_ tag # other language-independent YAML types (from http://yaml.org/type/ ) - tagOrderedMap* : TagId = 9.TagId # !!omap tag - tagPairs* : TagId = 10.TagId # !!pairs tag - tagSet* : TagId = 11.TagId # !!set tag - tagBinary* : TagId = 12.TagId # !!binary tag - tagMerge* : TagId = 13.TagId # !!merge tag - tagTimestamp* : TagId = 14.TagId # !!timestamp tag - tagValue* : TagId = 15.TagId # !!value tag - tagYaml* : TagId = 16.TagId # !!yaml tag + tagOrderedMap* : TagId = 9.TagId ## \ + ## `!!omap `_ tag + tagPairs* : TagId = 10.TagId ## \ + ## `!!pairs `_ tag + tagSet* : TagId = 11.TagId ## \ + ## `!!set `_ tag + tagBinary* : TagId = 12.TagId ## \ + ## `!!binary `_ tag + tagMerge* : TagId = 13.TagId ## \ + ## `!!merge `_ tag + tagTimestamp* : TagId = 14.TagId ## \ + ## `!!timestamp `_ tag + tagValue* : TagId = 15.TagId ## \ + ## `!!value `_ tag + tagYaml* : TagId = 16.TagId ## \ + ## `!!yaml `_ tag anchorNone*: AnchorId = (-1).AnchorId # no anchor defined @@ -103,9 +124,16 @@ proc parse*(parser: YamlSequentialParser, s: Stream): YamlStream proc parseToJson*(s: Stream): seq[JsonNode] proc parseToJson*(s: string): seq[JsonNode] +proc dump*(s: YamlStream, target: Stream, tagLib: YamlTagLibrary, + style: YamlDumpStyle = yDumpDefault, indentationStep: int = 2) + +proc transform*(input: Stream, output: Stream, style: YamlDumpStyle, + indentationStep: int = 2) + # implementation include private.lexer include private.tagLibrary include private.sequential include private.json +include private.dumper \ No newline at end of file diff --git a/test/parsing.nim b/test/parsing.nim index c56fe4d..8c1064a 100644 --- a/test/parsing.nim +++ b/test/parsing.nim @@ -27,8 +27,8 @@ proc startSequence(tag: TagId = tagQuestionMark, anchor: AnchorId = anchorNone): YamlStreamEvent = result.kind = yamlStartSequence - result.objAnchor = anchor - result.objTag = tag + result.seqAnchor = anchor + result.seqTag = tag proc endSequence(): YamlStreamEvent = result.kind = yamlEndSequence @@ -36,8 +36,8 @@ proc endSequence(): YamlStreamEvent = proc startMap(tag: TagId = tagQuestionMark, anchor: AnchorId = anchorNone): YamlStreamEvent = result.kind = yamlStartMap - result.objAnchor = anchor - result.objTag = tag + result.mapAnchor = anchor + result.mapTag = tag proc endMap(): YamlStreamEvent = result.kind = yamlEndMap @@ -83,12 +83,16 @@ proc printDifference(expected, actual: YamlStreamEvent) = ", got ", actual.scalarType else: echo "[scalar] Unknown difference" - of yamlStartMap, yamlStartSequence: - if expected.objTag != actual.objTag: - echo "[object.tag] expected ", expected.objTag, ", got ", - actual.objTag + of yamlStartMap: + if expected.mapTag != actual.mapTag: + echo "[map.tag] expected ", expected.mapTag, ", got ", + actual.mapTag else: - echo "[object.tag] Unknown difference" + echo "[map.tag] Unknown difference" + of yamlStartSequence: + if expected.seqTag != actual.seqTag: + echo "[seq.tag] expected ", expected.seqTag, ", got ", + actual.seqTag of yamlAlias: if expected.aliasTarget != actual.aliasTarget: echo "[alias] expected ", expected.aliasTarget, ", got ",