From 3509706517f3562cbcbe9d94988eccdd80474ab8 Mon Sep 17 00:00:00 2001 From: Jordan Hrycaj Date: Thu, 5 May 2022 17:33:40 +0100 Subject: [PATCH] Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test --- json_serialization.nimble | 1 + json_serialization/lexer.nim | 287 ++++++++++++++++++++++++----- json_serialization/reader.nim | 89 +++++++-- json_serialization/std/options.nim | 2 +- tests/test_serialization.nim | 139 ++++++++++++++ 5 files changed, 455 insertions(+), 63 deletions(-) diff --git a/json_serialization.nimble b/json_serialization.nimble index 8ac07dc..9e13b5b 100644 --- a/json_serialization.nimble +++ b/json_serialization.nimble @@ -16,6 +16,7 @@ proc test(args, path: string) = mkDir "build" exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args & + " -d:nimOldCaseObjects " & " -r --hints:off --skipParentCfg --styleCheck:usages --styleCheck:error " & path task test, "Run all tests": diff --git a/json_serialization/lexer.nim b/json_serialization/lexer.nim index fdd827a..5efeade 100644 --- a/json_serialization/lexer.nim +++ b/json_serialization/lexer.nim @@ -6,7 +6,30 @@ import export inputs, types +{.push raises: [Defect].} + type + CustomIntHandler* = ##\ + ## Custom decimal integer parser, result values need to be captured + proc(dgt: int) {.gcsafe, raises: [Defect].} + + CustomByteAction* = enum + Continue ##\ + ## Default initialisation when provided to a `CustomBlobHandler` parser\ + ## function type via call-by-reference + + StopBeforeByte ##\ + ## Stop feeding and do not consume the current `byte` argument + + StopSwallowByte ##\ + ## Stop and discard current `byte` argument (e.g. the last double quote\ + ## '"' for a genuine string parser.) + + CustomBlobHandler* = ##\ + ## Custom text or binary parser, result values need to be captured. The\ + ## second argument `what` controlls the next action. + proc(b: byte; what: var CustomByteAction) {.gcsafe, raises: [Defect].} + TokKind* = enum tkError, tkEof, @@ -22,7 +45,20 @@ type tkBracketLe, tkBracketRi, tkColon, - tkComma + tkComma, + + tkQuoted, ##\ + ## unfinished/lazy type, eventally becomes `tkString` + tkExBlob, ##\ + ## externally held string value after successful custom parsing + + tkNumeric, ##\ + ## unfinished/lazy type, any of `tkInt`, `tkNegativeInt`, `tkFloat` + tkExInt, ##\ + ## externally held non-negative integer value after successful custom\ + ## parsing + tkExNegInt + ## externally held negative integer value after successful custom parsing JsonErrorKind* = enum errNone = "no error", @@ -39,6 +75,8 @@ type errCommentExpected = "comment expected" errOrphanSurrogate = "unicode surrogates must be followed by another unicode character" errNonPortableInt = "number is outside the range of portable values" + errCustomIntExpexted = "not a customised integer" + errCustomBlobExpexted = "not a customised quoted blob" JsonLexer* = object stream*: InputStream @@ -48,7 +86,7 @@ type lineStartPos: int tokenStart: int - tok*: TokKind + tokKind: TokKind # formerly `tok`, now accessible by getter err*: JsonErrorKind absIntVal*: uint64 # BEWARE: negative integers will have tok == tkNegativeInt @@ -61,9 +99,23 @@ const 1e20, 1e21, 1e22] # TODO: this table should be much larger # The largest JSON number value is 1E308 -proc renderTok*(lexer: JsonLexer, output: var string) = - case lexer.tok - of tkError, tkEof: +# needed in renderTok() +proc scanNumber(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].} +proc scanString(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].} + +proc renderTok*(lexer: var JsonLexer, output: var string) + {.gcsafe, raises: [Defect,IOError].} = + # The lazy part + case lexer.tokKind + of tkNumeric: + lexer.scanNumber + of tkQuoted: + lexer.scanString + else: + discard + # The real stuff + case lexer.tokKind + of tkError, tkEof, tkNumeric, tkExInt, tkExNegInt, tkQuoted, tkExBlob: discard of tkString: output.add '"' @@ -101,14 +153,14 @@ template peek(s: InputStream): char = template read(s: InputStream): char = char inputs.read(s) -proc hexCharValue(c: char): int {.inline.} = +proc hexCharValue(c: char): int = case c of '0'..'9': ord(c) - ord('0') of 'a'..'f': ord(c) - ord('a') + 10 of 'A'..'F': ord(c) - ord('A') + 10 else: -1 -proc isDigit(c: char): bool {.inline.} = +proc isDigit(c: char): bool = return (c >= '0' and c <= '9') proc col*(lexer: JsonLexer): int = @@ -123,7 +175,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T = line: 1, lineStartPos: 0, tokenStart: -1, - tok: tkError, + tokKind: tkError, err: errNone, absIntVal: uint64 0, floatVal: 0'f, @@ -131,7 +183,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T = template error(error: JsonErrorKind) {.dirty.} = lexer.err = error - lexer.tok = tkError + lexer.tokKind = tkError return template checkForUnexpectedEof {.dirty.} = @@ -142,18 +194,19 @@ template requireNextChar(): char = checkForUnexpectedEof() lexer.stream.read() -template checkForNonPortableInt(val: uint64) = - if lexer.mode == Portable and val > uint64(maxPortableInt): +template checkForNonPortableInt(val: uint64; overflow: bool) = + if overflow or (lexer.mode == Portable and val > uint64(maxPortableInt)): error errNonPortableInt -proc scanHexRune(lexer: var JsonLexer): int = +proc scanHexRune(lexer: var JsonLexer): int + {.gcsafe, raises: [Defect,IOError].} = for i in 0..3: let hexValue = hexCharValue requireNextChar() if hexValue == -1: error errHexCharExpected result = (result shl 4) or hexValue proc scanString(lexer: var JsonLexer) = - lexer.tok = tkString + lexer.tokKind = tkString lexer.strVal.setLen 0 lexer.tokenStart = lexer.stream.pos @@ -185,13 +238,13 @@ proc scanString(lexer: var JsonLexer) = lexer.strVal.add '\x00' of 'u': var rune = lexer.scanHexRune() - if lexer.tok == tkError: return + if lexer.tokKind == tkError: return # Deal with surrogates if (rune and 0xfc00) == 0xd800: if requireNextChar() != '\\': error errOrphanSurrogate if requireNextChar() != 'u': error errOrphanSurrogate let nextRune = lexer.scanHexRune() - if lexer.tok == tkError: return + if lexer.tokKind == tkError: return if (nextRune and 0xfc00) == 0xdc00: rune = 0x10000 + (((rune - 0xd800) shl 10) or (nextRune - 0xdc00)) lexer.strVal.add toUTF8(Rune(rune)) @@ -203,12 +256,13 @@ proc scanString(lexer: var JsonLexer) = else: lexer.strVal.add c -proc handleLF(lexer: var JsonLexer) {.inline.} = +proc handleLF(lexer: var JsonLexer) = advance lexer.stream lexer.line += 1 lexer.lineStartPos = lexer.stream.pos -proc skipWhitespace(lexer: var JsonLexer) = +proc skipWhitespace(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = template handleCR = # Beware: this is a template, because the return # statement has to exit `skipWhitespace`. @@ -275,7 +329,8 @@ template eatDigitAndPeek: char = if not lexer.stream.readable: return lexer.stream.peek() -proc scanSign(lexer: var JsonLexer): int = +proc scanSign(lexer: var JsonLexer): int + {.gcsafe, raises: [Defect,IOError].} = # Returns +1 or -1 # If a sign character is present, it must be followed # by more characters representing the number. If this @@ -290,16 +345,35 @@ proc scanSign(lexer: var JsonLexer): int = advance lexer.stream return 1 -proc scanInt(lexer: var JsonLexer): uint64 = +proc scanInt(lexer: var JsonLexer): (uint64,bool) + {.gcsafe, raises: [Defect,IOError].} = + ## Scan unsigned integer into uint64 if possible. + ## If all goes ok, the tuple `(parsed-value,false)` is returned. + ## On overflow, the tuple `(uint64.high,true)` is returned. var c = lexer.stream.peek() - result = uint64(ord(c) - ord('0')) - c = eatDigitAndPeek() + # Always possible to append `9` is result[0] is not larger + const canAppendDigit9 = (uint64.high - 9) div 10 + + result[0] = uint64(ord(c) - ord('0')) + + c = eatDigitAndPeek() # implicit auto-return while c.isDigit: - result = result * 10 + uint64(ord(c) - ord('0')) - c = eatDigitAndPeek() + # Process next digit unless overflow + if not result[1]: + let lsDgt = uint64(ord(c) - ord('0')) + if canAppendDigit9 < result[0] and + (uint64.high - lsDgt) div 10 < result[0]: + result[1] = true + result[0] = uint64.high + else: + result[0] = result[0] * 10 + lsDgt + # Fetch next digit + c = eatDigitAndPeek() # implicit auto-return -proc scanNumber(lexer: var JsonLexer) = + +proc scanNumber(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = var sign = lexer.scanSign() if sign == 0: return var c = lexer.stream.peek() @@ -307,18 +381,18 @@ proc scanNumber(lexer: var JsonLexer) = if c == '.': advance lexer.stream requireMoreNumberChars: discard - lexer.tok = tkFloat + lexer.tokKind = tkFloat c = lexer.stream.peek() elif c.isDigit: - lexer.tok = if sign > 0: tkInt - else: tkNegativeInt - let scannedValue = lexer.scanInt() - checkForNonPortableInt scannedValue + lexer.tokKind = if sign > 0: tkInt + else: tkNegativeInt + let (scannedValue,overflow) = lexer.scanInt() + checkForNonPortableInt scannedValue, overflow lexer.absIntVal = scannedValue if not lexer.stream.readable: return c = lexer.stream.peek() if c == '.': - lexer.tok = tkFloat + lexer.tokKind = tkFloat lexer.floatVal = float(lexer.absIntVal) * float(sign) c = eatDigitAndPeek() else: @@ -338,7 +412,7 @@ proc scanNumber(lexer: var JsonLexer) = if not isDigit lexer.stream.peek(): error errNumberExpected - let exponent = lexer.scanInt() + let (exponent,_) = lexer.scanInt() if exponent >= uint64(len(powersOfTen)): error errExponentTooLarge @@ -351,47 +425,170 @@ proc scanIdentifier(lexer: var JsonLexer, expectedIdent: string, expectedTok: TokKind) = for c in expectedIdent: if c != lexer.stream.read(): - lexer.tok = tkError + lexer.tokKind = tkError return - lexer.tok = expectedTok + lexer.tokKind = expectedTok -proc next*(lexer: var JsonLexer) = +proc accept*(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = + ## Finalise token by parsing the value. Note that this might change + ## the token type + case lexer.tokKind + of tkNumeric: + lexer.scanNumber + of tkQuoted: + lexer.scanString + else: + discard + +proc next*(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = lexer.skipWhitespace() if not lexer.stream.readable: - lexer.tok = tkEof + lexer.tokKind = tkEof return + # in case the value parsing was missing + lexer.accept() + lexer.strVal.setLen 0 # release memory (if any) + let c = lexer.stream.peek() case c of '+', '-', '.', '0'..'9': - lexer.scanNumber() + lexer.tokKind = tkNumeric of '"': - lexer.scanString() + lexer.tokKind = tkQuoted of '[': advance lexer.stream - lexer.tok = tkBracketLe + lexer.tokKind = tkBracketLe of '{': advance lexer.stream - lexer.tok = tkCurlyLe + lexer.tokKind = tkCurlyLe of ']': advance lexer.stream - lexer.tok = tkBracketRi + lexer.tokKind = tkBracketRi of '}': advance lexer.stream - lexer.tok = tkCurlyRi + lexer.tokKind = tkCurlyRi of ',': advance lexer.stream - lexer.tok = tkComma + lexer.tokKind = tkComma of ':': advance lexer.stream - lexer.tok = tkColon + lexer.tokKind = tkColon of '\0': - lexer.tok = tkEof + lexer.tokKind = tkEof of 'n': lexer.scanIdentifier("null", tkNull) of 't': lexer.scanIdentifier("true", tkTrue) of 'f': lexer.scanIdentifier("false", tkFalse) else: advance lexer.stream - lexer.tok = tkError + lexer.tokKind = tkError +proc tok*(lexer: var JsonLexer): TokKind + {.gcsafe, raises: [Defect,IOError].} = + ## Getter, implies full token parsing + lexer.accept + lexer.tokKind + +proc lazyTok*(lexer: JsonLexer): TokKind = + ## Preliminary token state unless accepted, already + lexer.tokKind + + +proc customIntHandler*(lexer: var JsonLexer; handler: CustomIntHandler) + {.gcsafe, raises: [Defect,IOError].} = + ## Apply the `handler` argument function for parsing a `tkNumeric` type + ## value. This function sets the token state to `tkExInt`, `tkExNegInt`, + ## or `tkError`. + proc customScan(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = + var c = lexer.stream.peek() + handler(ord(c) - ord('0')) + c = eatDigitAndPeek() # implicit auto-return + while c.isDigit: + handler(ord(c) - ord('0')) + c = eatDigitAndPeek() # implicit auto-return + + if lexer.tokKind == tkNumeric: + var sign = lexer.scanSign() + if sign != 0: + if lexer.stream.peek.isDigit: + lexer.tokKind = if 0 < sign: tkExInt else: tkExNegInt + lexer.customScan + if not lexer.stream.readable or lexer.stream.peek != '.': + return + + error errCustomIntExpexted + +proc customBlobHandler*(lexer: var JsonLexer; handler: CustomBlobHandler) + {.gcsafe, raises: [Defect,IOError].} = + ## Apply the `handler` argument function for parsing a `tkQuoted` type + ## value. This function sets the token state to `tkExBlob`, or `tkError`. + proc customScan(lexer: var JsonLexer) + {.gcsafe, raises: [Defect,IOError].} = + var what = Continue + while lexer.stream.readable: + var c = lexer.stream.peek + handler(c.byte, what) + case what + of StopBeforeByte: + break + of StopSwallowByte: + advance lexer.stream + break + of Continue: + advance lexer.stream + + if lexer.tokKind == tkQuoted: + advance lexer.stream + lexer.tokKind = tkExBlob + lexer.customScan + return + + error errCustomBlobExpexted + + +template customIntValueIt*(lexer: var JsonLexer; body: untyped): untyped = + ## Convenience wrapper around `customIntHandler()` for parsing integers. + ## + ## The `body` argument represents a virtual function body. So the current + ## digit processing can be exited with `return`. + var handler: CustomIntHandler = + proc(digit: int) = + let it {.inject.} = digit + body + lexer.customIntHandler(handler) + +template customBlobValueIt*(lexer: var JsonLexer; body: untyped): untyped = + ## Convenience wrapper around `customBlobHandler()` for parsing any byte + ## object. The body function needs to terminate explicitely with the typical + ## phrase `doNext = StopSwallowByte` or with the more unusual phrase + ## `doNext = StopBeforeByte`. + ## + ## The `body` argument represents a virtual function body. So the current + ## byte processing can be exited with `return`. + var handler: CustomBlobHandler = + proc(c: byte; what: var CustomByteAction) = + let it {.inject.} = c + var doNext {.inject.} = what + body + what = doNext + lexer.customBlobHandler(handler) + +template customTextValueIt*(lexer: var JsonLexer; body: untyped): untyped = + ## Convenience wrapper around `customBlobHandler()` for parsing a text + ## terminating with a double quote character '"' (no inner double quote + ## allowed.) + ## + ## The `body` argument represents a virtual function body. So the current + ## character processing can be exited with `return`. + var handler: CustomBlobHandler = + proc(c: byte; what: var CustomByteAction) = + let it {.inject.} = c.chr + if it == '"': + what = StopSwallowByte + else: + body + lexer.customBlobHandler(handler) diff --git a/json_serialization/reader.nim b/json_serialization/reader.nim index c3aea44..1038292 100644 --- a/json_serialization/reader.nim +++ b/json_serialization/reader.nim @@ -101,7 +101,7 @@ proc raiseUnexpectedToken*(r: JsonReader, expected: ExpectedTokenCategory) {.noreturn.} = var ex = new UnexpectedTokenError ex.assignLineNumber(r) - ex.encountedToken = r.lexer.tok + ex.encountedToken = r.lexer.lazyTok ex.expectedToken = expected raise ex @@ -155,7 +155,7 @@ proc init*(T: type JsonReader, proc setParsed[T: enum](e: var T, s: string) = e = parseEnum[T](s) -proc requireToken*(r: JsonReader, tk: TokKind) = +proc requireToken*(r: var JsonReader, tk: TokKind) = if r.lexer.tok != tk: r.raiseUnexpectedToken case tk of tkString: etString @@ -262,6 +262,10 @@ proc parseJsonNode(r: var JsonReader): JsonNode = result = JsonNode(kind: JNull) r.lexer.next() + of tkQuoted, tkExBlob, tkNumeric, tkExInt, tkExNegInt: + raiseAssert "generic type " & $r.lexer.lazyTok & " is not applicable" + + proc skipSingleJsValue(r: var JsonReader) = case r.lexer.tok of tkCurlyLe: @@ -292,7 +296,9 @@ proc skipSingleJsValue(r: var JsonReader) = of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi: r.raiseUnexpectedToken etValue - of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull: + of tkString, tkQuoted, tkExBlob, + tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt, + tkTrue, tkFalse, tkNull: r.lexer.next() proc captureSingleJsValue(r: var JsonReader, output: var string) = @@ -335,7 +341,9 @@ proc captureSingleJsValue(r: var JsonReader, output: var string) = of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi: r.raiseUnexpectedToken etValue - of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull: + of tkString, tkQuoted, tkExBlob, + tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt, + tkTrue, tkFalse, tkNull: r.lexer.next() proc allocPtr[T](p: var ptr T) = @@ -348,7 +356,7 @@ iterator readArray*(r: var JsonReader, ElemType: typedesc): ElemType = mixin readValue r.skipToken tkBracketLe - if r.lexer.tok != tkBracketRi: + if r.lexer.lazyTok != tkBracketRi: while true: var res: ElemType readValue(r, res) @@ -362,14 +370,14 @@ iterator readObjectFields*(r: var JsonReader, mixin readValue r.skipToken tkCurlyLe - if r.lexer.tok != tkCurlyRi: + if r.lexer.lazyTok != tkCurlyRi: while true: var key: KeyType readValue(r, key) - if r.lexer.tok != tkColon: break + if r.lexer.lazyTok != tkColon: break r.lexer.next() yield key - if r.lexer.tok != tkComma: break + if r.lexer.lazyTok != tkComma: break r.lexer.next() r.skipToken tkCurlyRi @@ -394,10 +402,47 @@ template isCharArray(v: auto): bool = false proc readValue*[T](r: var JsonReader, value: var T) {.raises: [SerializationError, IOError, Defect].} = + ## Master filed/object parser. This function relies on customised sub-mixins for particular + ## object types. + ## + ## Customised readValue() examples: + ## :: + ## type + ## FancyInt = distinct int + ## FancyUInt = distinct uint + ## + ## proc readValue(reader: var JsonReader, value: var FancyInt) = + ## ## Refer to another readValue() instance + ## value = reader.readValue(int).FancyInt + ## + ## proc readValue(reader: var JsonReader, value: var FancyUInt) = + ## ## Provide a full custum version of a readValue() instance + ## if reader.lexer.lazyTok == tkNumeric: + ## # lazyTok: Check token before the value is available + ## var accu: FancyUInt + ## # custom parser (the directive `customIntValueIt()` is a + ## # convenience wrapper around `customIntHandler()`.) + ## reader.lexer.customIntValueIt: + ## accu = accu * 10 + it.u256 + ## value = accu + ## elif reader.lexer.lazyTok == tkQuoted: + ## var accu = string + ## # The following is really for demo only (inefficient, + ## # lacks hex encoding) + ## reader.lexer.customTextValueIt: + ## accu &= it + ## value = accu.parseUInt.FancyUInt + ## ... + ## # prepare next parser cycle + ## reader.lexer.next + ## mixin readValue type ReaderType {.used.} = type r - let tok {.used.} = r.lexer.tok + when value is (object or tuple): + let tok {.used.} = r.lexer.lazyTok + else: + let tok {.used.} = r.lexer.tok # resove lazy token when value is JsonString: r.captureSingleJsValue(string value) @@ -527,23 +572,32 @@ proc readValue*[T](r: var JsonReader, value: var T) when expectedFields > 0: let fields = T.fieldReadersTable(ReaderType) var expectedFieldPos = 0 - while r.lexer.tok == tkString: + while true: + # Have the assignment parsed of the AVP + if r.lexer.lazyTok == tkQuoted: + r.lexer.accept + if r.lexer.lazyTok != tkString: + break + # Calculate/assemble handler when T is tuple: var reader = fields[][expectedFieldPos].reader expectedFieldPos += 1 else: var reader = findFieldReader(fields[], r.lexer.strVal, expectedFieldPos) - r.lexer.next() - r.skipToken tkColon if reader != nil: + r.lexer.next() + r.skipToken tkColon reader(value, r) inc readFields - elif r.allowUnknownFields: - r.skipSingleJsValue() else: - const typeName = typetraits.name(T) - r.raiseUnexpectedField(r.lexer.strVal, typeName) - if r.lexer.tok == tkComma: + r.lexer.next() + r.skipToken tkColon + if r.allowUnknownFields: + r.skipSingleJsValue() + else: + const typeName = typetraits.name(T) + r.raiseUnexpectedField(r.lexer.strVal, typeName) + if r.lexer.lazyTok == tkComma: r.lexer.next() else: break @@ -552,6 +606,7 @@ proc readValue*[T](r: var JsonReader, value: var T) const typeName = typetraits.name(T) r.raiseIncompleteObject(typeName) + r.lexer.accept r.skipToken tkCurlyRi else: diff --git a/json_serialization/std/options.nim b/json_serialization/std/options.nim index be2aef0..39be41a 100644 --- a/json_serialization/std/options.nim +++ b/json_serialization/std/options.nim @@ -8,7 +8,7 @@ proc writeValue*(writer: var JsonWriter, value: Option) = writer.writeValue JsonString("null") proc readValue*[T](reader: var JsonReader, value: var Option[T]) = - let tok = reader.lexer.tok + let tok = reader.lexer.lazyTok if tok == tkNull: reset value reader.lexer.next() diff --git a/tests/test_serialization.nim b/tests/test_serialization.nim index 15ed7a3..ddf0692 100644 --- a/tests/test_serialization.nim +++ b/tests/test_serialization.nim @@ -3,6 +3,7 @@ import serialization/object_serialization, serialization/testing/generic_suite, ../json_serialization, ./utils, + ../json_serialization/lexer, ../json_serialization/std/[options, sets, tables] type @@ -50,6 +51,80 @@ type notNilStr: cstring nilStr: cstring + # Customised parser tests + FancyInt = distinct int + FancyUInt = distinct uint + FancyText = distinct string + + HasFancyInt = object + name: string + data: FancyInt + + HasFancyUInt = object + name: string + data: FancyUInt + + HasFancyText = object + name: string + data: FancyText + + TokenRegistry = tuple + entry, exit: TokKind + dup: bool + +var + customVisit: TokenRegistry + +template registerVisit(reader: var JsonReader; body: untyped): untyped = + if customVisit.entry == tkError: + customVisit.entry = reader.lexer.lazyTok + body + customVisit.exit = reader.lexer.lazyTok + else: + customVisit.dup = true + +# Customised parser referring to other parser +proc readValue(reader: var JsonReader, value: var FancyInt) = + reader.registerVisit: + value = reader.readValue(int).FancyInt + +# Customised numeric parser for integer and stringified integer +proc readValue(reader: var JsonReader, value: var FancyUInt) = + reader.registerVisit: + var accu = 0u + case reader.lexer.lazyTok + of tkNumeric: + reader.lexer.customIntValueIt: + accu = accu * 10u + it.uint + of tkQuoted: + var s = "" + reader.lexer.customTextValueIt: + s &= it + accu = s.parseUInt + else: + discard + value = accu.FancyUInt + reader.lexer.next + +# Customised numeric parser for text, accepts embedded quote +proc readValue(reader: var JsonReader, value: var FancyText) = + reader.registerVisit: + var (s, esc) = ("",false) + reader.lexer.customBlobValueIt: + let c = it.chr + if esc: + s &= c + esc = false + elif c == '\\': + esc = true + elif c != '"': + s &= c + else: + doNext = StopSwallowByte + value = s.FancyText + reader.lexer.next + + # TODO `borrowSerialization` still doesn't work # properly when it's placed in another module: Meter.borrowSerialization int @@ -371,3 +446,67 @@ suite "toJson tests": # clarity regarding the memory allocation approach Json.decode("null", cstring) +suite "Custom parser tests": + test "Fall back to int parser": + customVisit = TokenRegistry.default + + let + jData = test_dedent""" + { + "name": "FancyInt", + "data": -12345 + } + """ + dData = Json.decode(jData, HasFancyInt) + + check dData.name == "FancyInt" + check dData.data.int == -12345 + check customVisit == (tkNumeric, tkCurlyRi, false) + + test "Uint parser on negative integer": + customVisit = TokenRegistry.default + + let + jData = test_dedent""" + { + "name": "FancyUInt", + "data": -12345 + } + """ + dData = Json.decode(jData, HasFancyUInt) + + check dData.name == "FancyUInt" + check dData.data.uint == 12345u # abs value + check customVisit == (tkNumeric, tkExNegInt, false) + + test "Uint parser on string integer": + customVisit = TokenRegistry.default + + let + jData = test_dedent""" + { + "name": "FancyUInt", + "data": "12345" + } + """ + dData = Json.decode(jData, HasFancyUInt) + + check dData.name == "FancyUInt" + check dData.data.uint == 12345u + check customVisit == (tkQuoted, tkExBlob, false) + + test "Parser on text blob with embedded quote (backlash escape support)": + customVisit = TokenRegistry.default + + let + jData = test_dedent""" + { + "name": "FancyText", + "data": "a\bc\"\\def" + } + """ + dData = Json.decode(jData, HasFancyText) + + check dData.name == "FancyText" + check dData.data.string == "abc\"\\def" + check customVisit == (tkQuoted, tkExBlob, false)