Lazy JSON parser (#42)

* Proper error handling when parsed number exceeds uint64

details:
  Returns an "errNonPortableInt" error

* need legacy flag for unit tests

* lazy numeric token parser

why:
  Numeric data may have a custom format. In particular,numeric data may be
  Uint256 which is not a JSON standard and might lead to an overflow.

details:
  Numeric values are assigned a preliminary token type tkNumeric without
  being fully parsed. This can be used to insert a custom parser.
  Otherwise the value is parsed implicitly when querying/fetching the
  token type.

  + tok:     replaced by getter tok() resolving lazy stuff (if necessary)
  + tokKind: current type without auto-resolving

  This lazy scheme could be extended to other custom types as long as
  the first token letter determines the custom type.

* activate lazy parsing in reader

howto:
  + no code change if a custom reader refers to an existing reader
    type FancyInt = distinct int
    proc readValue(reader: var JsonReader, value: var FancyInt) =
      value = reader.readValue(int).FancyInt

  + bespoke reader for cusom parsing
    type FancyUint = distinct uint
    proc readValue(reader: var JsonReader, value: var FancyUint) =
      if reader.lexer.lazyTok == tkNumeric:
        var accu: FancyUint
        reader.lexer.customIntValueIt:
          accu = accu * 10 + it.u256
        value = accu
      elif reader.lexer.tok == tkString:
        value = reader.lexer.strVal.parseUint.FancyUint
        ...
      reader.lexer.next

  + full code explanation at json_serialisation/reader.readValue()

* Add lazy parsing for customised string objects

why:
  This allows parsing large or specialised strings without storing it
  in the lexer state descriptor.

details:
  Similar logic applies as for the cusomised number parser. For mostly
  all practical cases, a DSL template is available serving as wrapper
  around the character/byte item processor code.

* fix typo in unit test
This commit is contained in:
Jordan Hrycaj 2022-05-05 17:33:40 +01:00 committed by GitHub
parent 074cd026e6
commit 3509706517
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 455 additions and 63 deletions

View File

@ -16,6 +16,7 @@ proc test(args, path: string) =
mkDir "build"
exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args &
" -d:nimOldCaseObjects " &
" -r --hints:off --skipParentCfg --styleCheck:usages --styleCheck:error " & path
task test, "Run all tests":

View File

@ -6,7 +6,30 @@ import
export
inputs, types
{.push raises: [Defect].}
type
CustomIntHandler* = ##\
## Custom decimal integer parser, result values need to be captured
proc(dgt: int) {.gcsafe, raises: [Defect].}
CustomByteAction* = enum
Continue ##\
## Default initialisation when provided to a `CustomBlobHandler` parser\
## function type via call-by-reference
StopBeforeByte ##\
## Stop feeding and do not consume the current `byte` argument
StopSwallowByte ##\
## Stop and discard current `byte` argument (e.g. the last double quote\
## '"' for a genuine string parser.)
CustomBlobHandler* = ##\
## Custom text or binary parser, result values need to be captured. The\
## second argument `what` controlls the next action.
proc(b: byte; what: var CustomByteAction) {.gcsafe, raises: [Defect].}
TokKind* = enum
tkError,
tkEof,
@ -22,7 +45,20 @@ type
tkBracketLe,
tkBracketRi,
tkColon,
tkComma
tkComma,
tkQuoted, ##\
## unfinished/lazy type, eventally becomes `tkString`
tkExBlob, ##\
## externally held string value after successful custom parsing
tkNumeric, ##\
## unfinished/lazy type, any of `tkInt`, `tkNegativeInt`, `tkFloat`
tkExInt, ##\
## externally held non-negative integer value after successful custom\
## parsing
tkExNegInt
## externally held negative integer value after successful custom parsing
JsonErrorKind* = enum
errNone = "no error",
@ -39,6 +75,8 @@ type
errCommentExpected = "comment expected"
errOrphanSurrogate = "unicode surrogates must be followed by another unicode character"
errNonPortableInt = "number is outside the range of portable values"
errCustomIntExpexted = "not a customised integer"
errCustomBlobExpexted = "not a customised quoted blob"
JsonLexer* = object
stream*: InputStream
@ -48,7 +86,7 @@ type
lineStartPos: int
tokenStart: int
tok*: TokKind
tokKind: TokKind # formerly `tok`, now accessible by getter
err*: JsonErrorKind
absIntVal*: uint64 # BEWARE: negative integers will have tok == tkNegativeInt
@ -61,9 +99,23 @@ const
1e20, 1e21, 1e22] # TODO: this table should be much larger
# The largest JSON number value is 1E308
proc renderTok*(lexer: JsonLexer, output: var string) =
case lexer.tok
of tkError, tkEof:
# needed in renderTok()
proc scanNumber(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
proc scanString(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
proc renderTok*(lexer: var JsonLexer, output: var string)
{.gcsafe, raises: [Defect,IOError].} =
# The lazy part
case lexer.tokKind
of tkNumeric:
lexer.scanNumber
of tkQuoted:
lexer.scanString
else:
discard
# The real stuff
case lexer.tokKind
of tkError, tkEof, tkNumeric, tkExInt, tkExNegInt, tkQuoted, tkExBlob:
discard
of tkString:
output.add '"'
@ -101,14 +153,14 @@ template peek(s: InputStream): char =
template read(s: InputStream): char =
char inputs.read(s)
proc hexCharValue(c: char): int {.inline.} =
proc hexCharValue(c: char): int =
case c
of '0'..'9': ord(c) - ord('0')
of 'a'..'f': ord(c) - ord('a') + 10
of 'A'..'F': ord(c) - ord('A') + 10
else: -1
proc isDigit(c: char): bool {.inline.} =
proc isDigit(c: char): bool =
return (c >= '0' and c <= '9')
proc col*(lexer: JsonLexer): int =
@ -123,7 +175,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =
line: 1,
lineStartPos: 0,
tokenStart: -1,
tok: tkError,
tokKind: tkError,
err: errNone,
absIntVal: uint64 0,
floatVal: 0'f,
@ -131,7 +183,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =
template error(error: JsonErrorKind) {.dirty.} =
lexer.err = error
lexer.tok = tkError
lexer.tokKind = tkError
return
template checkForUnexpectedEof {.dirty.} =
@ -142,18 +194,19 @@ template requireNextChar(): char =
checkForUnexpectedEof()
lexer.stream.read()
template checkForNonPortableInt(val: uint64) =
if lexer.mode == Portable and val > uint64(maxPortableInt):
template checkForNonPortableInt(val: uint64; overflow: bool) =
if overflow or (lexer.mode == Portable and val > uint64(maxPortableInt)):
error errNonPortableInt
proc scanHexRune(lexer: var JsonLexer): int =
proc scanHexRune(lexer: var JsonLexer): int
{.gcsafe, raises: [Defect,IOError].} =
for i in 0..3:
let hexValue = hexCharValue requireNextChar()
if hexValue == -1: error errHexCharExpected
result = (result shl 4) or hexValue
proc scanString(lexer: var JsonLexer) =
lexer.tok = tkString
lexer.tokKind = tkString
lexer.strVal.setLen 0
lexer.tokenStart = lexer.stream.pos
@ -185,13 +238,13 @@ proc scanString(lexer: var JsonLexer) =
lexer.strVal.add '\x00'
of 'u':
var rune = lexer.scanHexRune()
if lexer.tok == tkError: return
if lexer.tokKind == tkError: return
# Deal with surrogates
if (rune and 0xfc00) == 0xd800:
if requireNextChar() != '\\': error errOrphanSurrogate
if requireNextChar() != 'u': error errOrphanSurrogate
let nextRune = lexer.scanHexRune()
if lexer.tok == tkError: return
if lexer.tokKind == tkError: return
if (nextRune and 0xfc00) == 0xdc00:
rune = 0x10000 + (((rune - 0xd800) shl 10) or (nextRune - 0xdc00))
lexer.strVal.add toUTF8(Rune(rune))
@ -203,12 +256,13 @@ proc scanString(lexer: var JsonLexer) =
else:
lexer.strVal.add c
proc handleLF(lexer: var JsonLexer) {.inline.} =
proc handleLF(lexer: var JsonLexer) =
advance lexer.stream
lexer.line += 1
lexer.lineStartPos = lexer.stream.pos
proc skipWhitespace(lexer: var JsonLexer) =
proc skipWhitespace(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
template handleCR =
# Beware: this is a template, because the return
# statement has to exit `skipWhitespace`.
@ -275,7 +329,8 @@ template eatDigitAndPeek: char =
if not lexer.stream.readable: return
lexer.stream.peek()
proc scanSign(lexer: var JsonLexer): int =
proc scanSign(lexer: var JsonLexer): int
{.gcsafe, raises: [Defect,IOError].} =
# Returns +1 or -1
# If a sign character is present, it must be followed
# by more characters representing the number. If this
@ -290,16 +345,35 @@ proc scanSign(lexer: var JsonLexer): int =
advance lexer.stream
return 1
proc scanInt(lexer: var JsonLexer): uint64 =
proc scanInt(lexer: var JsonLexer): (uint64,bool)
{.gcsafe, raises: [Defect,IOError].} =
## Scan unsigned integer into uint64 if possible.
## If all goes ok, the tuple `(parsed-value,false)` is returned.
## On overflow, the tuple `(uint64.high,true)` is returned.
var c = lexer.stream.peek()
result = uint64(ord(c) - ord('0'))
c = eatDigitAndPeek()
# Always possible to append `9` is result[0] is not larger
const canAppendDigit9 = (uint64.high - 9) div 10
result[0] = uint64(ord(c) - ord('0'))
c = eatDigitAndPeek() # implicit auto-return
while c.isDigit:
result = result * 10 + uint64(ord(c) - ord('0'))
c = eatDigitAndPeek()
# Process next digit unless overflow
if not result[1]:
let lsDgt = uint64(ord(c) - ord('0'))
if canAppendDigit9 < result[0] and
(uint64.high - lsDgt) div 10 < result[0]:
result[1] = true
result[0] = uint64.high
else:
result[0] = result[0] * 10 + lsDgt
# Fetch next digit
c = eatDigitAndPeek() # implicit auto-return
proc scanNumber(lexer: var JsonLexer) =
proc scanNumber(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
var sign = lexer.scanSign()
if sign == 0: return
var c = lexer.stream.peek()
@ -307,18 +381,18 @@ proc scanNumber(lexer: var JsonLexer) =
if c == '.':
advance lexer.stream
requireMoreNumberChars: discard
lexer.tok = tkFloat
lexer.tokKind = tkFloat
c = lexer.stream.peek()
elif c.isDigit:
lexer.tok = if sign > 0: tkInt
else: tkNegativeInt
let scannedValue = lexer.scanInt()
checkForNonPortableInt scannedValue
lexer.tokKind = if sign > 0: tkInt
else: tkNegativeInt
let (scannedValue,overflow) = lexer.scanInt()
checkForNonPortableInt scannedValue, overflow
lexer.absIntVal = scannedValue
if not lexer.stream.readable: return
c = lexer.stream.peek()
if c == '.':
lexer.tok = tkFloat
lexer.tokKind = tkFloat
lexer.floatVal = float(lexer.absIntVal) * float(sign)
c = eatDigitAndPeek()
else:
@ -338,7 +412,7 @@ proc scanNumber(lexer: var JsonLexer) =
if not isDigit lexer.stream.peek():
error errNumberExpected
let exponent = lexer.scanInt()
let (exponent,_) = lexer.scanInt()
if exponent >= uint64(len(powersOfTen)):
error errExponentTooLarge
@ -351,47 +425,170 @@ proc scanIdentifier(lexer: var JsonLexer,
expectedIdent: string, expectedTok: TokKind) =
for c in expectedIdent:
if c != lexer.stream.read():
lexer.tok = tkError
lexer.tokKind = tkError
return
lexer.tok = expectedTok
lexer.tokKind = expectedTok
proc next*(lexer: var JsonLexer) =
proc accept*(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
## Finalise token by parsing the value. Note that this might change
## the token type
case lexer.tokKind
of tkNumeric:
lexer.scanNumber
of tkQuoted:
lexer.scanString
else:
discard
proc next*(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
lexer.skipWhitespace()
if not lexer.stream.readable:
lexer.tok = tkEof
lexer.tokKind = tkEof
return
# in case the value parsing was missing
lexer.accept()
lexer.strVal.setLen 0 # release memory (if any)
let c = lexer.stream.peek()
case c
of '+', '-', '.', '0'..'9':
lexer.scanNumber()
lexer.tokKind = tkNumeric
of '"':
lexer.scanString()
lexer.tokKind = tkQuoted
of '[':
advance lexer.stream
lexer.tok = tkBracketLe
lexer.tokKind = tkBracketLe
of '{':
advance lexer.stream
lexer.tok = tkCurlyLe
lexer.tokKind = tkCurlyLe
of ']':
advance lexer.stream
lexer.tok = tkBracketRi
lexer.tokKind = tkBracketRi
of '}':
advance lexer.stream
lexer.tok = tkCurlyRi
lexer.tokKind = tkCurlyRi
of ',':
advance lexer.stream
lexer.tok = tkComma
lexer.tokKind = tkComma
of ':':
advance lexer.stream
lexer.tok = tkColon
lexer.tokKind = tkColon
of '\0':
lexer.tok = tkEof
lexer.tokKind = tkEof
of 'n': lexer.scanIdentifier("null", tkNull)
of 't': lexer.scanIdentifier("true", tkTrue)
of 'f': lexer.scanIdentifier("false", tkFalse)
else:
advance lexer.stream
lexer.tok = tkError
lexer.tokKind = tkError
proc tok*(lexer: var JsonLexer): TokKind
{.gcsafe, raises: [Defect,IOError].} =
## Getter, implies full token parsing
lexer.accept
lexer.tokKind
proc lazyTok*(lexer: JsonLexer): TokKind =
## Preliminary token state unless accepted, already
lexer.tokKind
proc customIntHandler*(lexer: var JsonLexer; handler: CustomIntHandler)
{.gcsafe, raises: [Defect,IOError].} =
## Apply the `handler` argument function for parsing a `tkNumeric` type
## value. This function sets the token state to `tkExInt`, `tkExNegInt`,
## or `tkError`.
proc customScan(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
var c = lexer.stream.peek()
handler(ord(c) - ord('0'))
c = eatDigitAndPeek() # implicit auto-return
while c.isDigit:
handler(ord(c) - ord('0'))
c = eatDigitAndPeek() # implicit auto-return
if lexer.tokKind == tkNumeric:
var sign = lexer.scanSign()
if sign != 0:
if lexer.stream.peek.isDigit:
lexer.tokKind = if 0 < sign: tkExInt else: tkExNegInt
lexer.customScan
if not lexer.stream.readable or lexer.stream.peek != '.':
return
error errCustomIntExpexted
proc customBlobHandler*(lexer: var JsonLexer; handler: CustomBlobHandler)
{.gcsafe, raises: [Defect,IOError].} =
## Apply the `handler` argument function for parsing a `tkQuoted` type
## value. This function sets the token state to `tkExBlob`, or `tkError`.
proc customScan(lexer: var JsonLexer)
{.gcsafe, raises: [Defect,IOError].} =
var what = Continue
while lexer.stream.readable:
var c = lexer.stream.peek
handler(c.byte, what)
case what
of StopBeforeByte:
break
of StopSwallowByte:
advance lexer.stream
break
of Continue:
advance lexer.stream
if lexer.tokKind == tkQuoted:
advance lexer.stream
lexer.tokKind = tkExBlob
lexer.customScan
return
error errCustomBlobExpexted
template customIntValueIt*(lexer: var JsonLexer; body: untyped): untyped =
## Convenience wrapper around `customIntHandler()` for parsing integers.
##
## The `body` argument represents a virtual function body. So the current
## digit processing can be exited with `return`.
var handler: CustomIntHandler =
proc(digit: int) =
let it {.inject.} = digit
body
lexer.customIntHandler(handler)
template customBlobValueIt*(lexer: var JsonLexer; body: untyped): untyped =
## Convenience wrapper around `customBlobHandler()` for parsing any byte
## object. The body function needs to terminate explicitely with the typical
## phrase `doNext = StopSwallowByte` or with the more unusual phrase
## `doNext = StopBeforeByte`.
##
## The `body` argument represents a virtual function body. So the current
## byte processing can be exited with `return`.
var handler: CustomBlobHandler =
proc(c: byte; what: var CustomByteAction) =
let it {.inject.} = c
var doNext {.inject.} = what
body
what = doNext
lexer.customBlobHandler(handler)
template customTextValueIt*(lexer: var JsonLexer; body: untyped): untyped =
## Convenience wrapper around `customBlobHandler()` for parsing a text
## terminating with a double quote character '"' (no inner double quote
## allowed.)
##
## The `body` argument represents a virtual function body. So the current
## character processing can be exited with `return`.
var handler: CustomBlobHandler =
proc(c: byte; what: var CustomByteAction) =
let it {.inject.} = c.chr
if it == '"':
what = StopSwallowByte
else:
body
lexer.customBlobHandler(handler)

View File

@ -101,7 +101,7 @@ proc raiseUnexpectedToken*(r: JsonReader, expected: ExpectedTokenCategory)
{.noreturn.} =
var ex = new UnexpectedTokenError
ex.assignLineNumber(r)
ex.encountedToken = r.lexer.tok
ex.encountedToken = r.lexer.lazyTok
ex.expectedToken = expected
raise ex
@ -155,7 +155,7 @@ proc init*(T: type JsonReader,
proc setParsed[T: enum](e: var T, s: string) =
e = parseEnum[T](s)
proc requireToken*(r: JsonReader, tk: TokKind) =
proc requireToken*(r: var JsonReader, tk: TokKind) =
if r.lexer.tok != tk:
r.raiseUnexpectedToken case tk
of tkString: etString
@ -262,6 +262,10 @@ proc parseJsonNode(r: var JsonReader): JsonNode =
result = JsonNode(kind: JNull)
r.lexer.next()
of tkQuoted, tkExBlob, tkNumeric, tkExInt, tkExNegInt:
raiseAssert "generic type " & $r.lexer.lazyTok & " is not applicable"
proc skipSingleJsValue(r: var JsonReader) =
case r.lexer.tok
of tkCurlyLe:
@ -292,7 +296,9 @@ proc skipSingleJsValue(r: var JsonReader) =
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
r.raiseUnexpectedToken etValue
of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
of tkString, tkQuoted, tkExBlob,
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
tkTrue, tkFalse, tkNull:
r.lexer.next()
proc captureSingleJsValue(r: var JsonReader, output: var string) =
@ -335,7 +341,9 @@ proc captureSingleJsValue(r: var JsonReader, output: var string) =
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
r.raiseUnexpectedToken etValue
of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
of tkString, tkQuoted, tkExBlob,
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
tkTrue, tkFalse, tkNull:
r.lexer.next()
proc allocPtr[T](p: var ptr T) =
@ -348,7 +356,7 @@ iterator readArray*(r: var JsonReader, ElemType: typedesc): ElemType =
mixin readValue
r.skipToken tkBracketLe
if r.lexer.tok != tkBracketRi:
if r.lexer.lazyTok != tkBracketRi:
while true:
var res: ElemType
readValue(r, res)
@ -362,14 +370,14 @@ iterator readObjectFields*(r: var JsonReader,
mixin readValue
r.skipToken tkCurlyLe
if r.lexer.tok != tkCurlyRi:
if r.lexer.lazyTok != tkCurlyRi:
while true:
var key: KeyType
readValue(r, key)
if r.lexer.tok != tkColon: break
if r.lexer.lazyTok != tkColon: break
r.lexer.next()
yield key
if r.lexer.tok != tkComma: break
if r.lexer.lazyTok != tkComma: break
r.lexer.next()
r.skipToken tkCurlyRi
@ -394,10 +402,47 @@ template isCharArray(v: auto): bool = false
proc readValue*[T](r: var JsonReader, value: var T)
{.raises: [SerializationError, IOError, Defect].} =
## Master filed/object parser. This function relies on customised sub-mixins for particular
## object types.
##
## Customised readValue() examples:
## ::
## type
## FancyInt = distinct int
## FancyUInt = distinct uint
##
## proc readValue(reader: var JsonReader, value: var FancyInt) =
## ## Refer to another readValue() instance
## value = reader.readValue(int).FancyInt
##
## proc readValue(reader: var JsonReader, value: var FancyUInt) =
## ## Provide a full custum version of a readValue() instance
## if reader.lexer.lazyTok == tkNumeric:
## # lazyTok: Check token before the value is available
## var accu: FancyUInt
## # custom parser (the directive `customIntValueIt()` is a
## # convenience wrapper around `customIntHandler()`.)
## reader.lexer.customIntValueIt:
## accu = accu * 10 + it.u256
## value = accu
## elif reader.lexer.lazyTok == tkQuoted:
## var accu = string
## # The following is really for demo only (inefficient,
## # lacks hex encoding)
## reader.lexer.customTextValueIt:
## accu &= it
## value = accu.parseUInt.FancyUInt
## ...
## # prepare next parser cycle
## reader.lexer.next
##
mixin readValue
type ReaderType {.used.} = type r
let tok {.used.} = r.lexer.tok
when value is (object or tuple):
let tok {.used.} = r.lexer.lazyTok
else:
let tok {.used.} = r.lexer.tok # resove lazy token
when value is JsonString:
r.captureSingleJsValue(string value)
@ -527,23 +572,32 @@ proc readValue*[T](r: var JsonReader, value: var T)
when expectedFields > 0:
let fields = T.fieldReadersTable(ReaderType)
var expectedFieldPos = 0
while r.lexer.tok == tkString:
while true:
# Have the assignment parsed of the AVP
if r.lexer.lazyTok == tkQuoted:
r.lexer.accept
if r.lexer.lazyTok != tkString:
break
# Calculate/assemble handler
when T is tuple:
var reader = fields[][expectedFieldPos].reader
expectedFieldPos += 1
else:
var reader = findFieldReader(fields[], r.lexer.strVal, expectedFieldPos)
r.lexer.next()
r.skipToken tkColon
if reader != nil:
r.lexer.next()
r.skipToken tkColon
reader(value, r)
inc readFields
elif r.allowUnknownFields:
r.skipSingleJsValue()
else:
const typeName = typetraits.name(T)
r.raiseUnexpectedField(r.lexer.strVal, typeName)
if r.lexer.tok == tkComma:
r.lexer.next()
r.skipToken tkColon
if r.allowUnknownFields:
r.skipSingleJsValue()
else:
const typeName = typetraits.name(T)
r.raiseUnexpectedField(r.lexer.strVal, typeName)
if r.lexer.lazyTok == tkComma:
r.lexer.next()
else:
break
@ -552,6 +606,7 @@ proc readValue*[T](r: var JsonReader, value: var T)
const typeName = typetraits.name(T)
r.raiseIncompleteObject(typeName)
r.lexer.accept
r.skipToken tkCurlyRi
else:

View File

@ -8,7 +8,7 @@ proc writeValue*(writer: var JsonWriter, value: Option) =
writer.writeValue JsonString("null")
proc readValue*[T](reader: var JsonReader, value: var Option[T]) =
let tok = reader.lexer.tok
let tok = reader.lexer.lazyTok
if tok == tkNull:
reset value
reader.lexer.next()

View File

@ -3,6 +3,7 @@ import
serialization/object_serialization,
serialization/testing/generic_suite,
../json_serialization, ./utils,
../json_serialization/lexer,
../json_serialization/std/[options, sets, tables]
type
@ -50,6 +51,80 @@ type
notNilStr: cstring
nilStr: cstring
# Customised parser tests
FancyInt = distinct int
FancyUInt = distinct uint
FancyText = distinct string
HasFancyInt = object
name: string
data: FancyInt
HasFancyUInt = object
name: string
data: FancyUInt
HasFancyText = object
name: string
data: FancyText
TokenRegistry = tuple
entry, exit: TokKind
dup: bool
var
customVisit: TokenRegistry
template registerVisit(reader: var JsonReader; body: untyped): untyped =
if customVisit.entry == tkError:
customVisit.entry = reader.lexer.lazyTok
body
customVisit.exit = reader.lexer.lazyTok
else:
customVisit.dup = true
# Customised parser referring to other parser
proc readValue(reader: var JsonReader, value: var FancyInt) =
reader.registerVisit:
value = reader.readValue(int).FancyInt
# Customised numeric parser for integer and stringified integer
proc readValue(reader: var JsonReader, value: var FancyUInt) =
reader.registerVisit:
var accu = 0u
case reader.lexer.lazyTok
of tkNumeric:
reader.lexer.customIntValueIt:
accu = accu * 10u + it.uint
of tkQuoted:
var s = ""
reader.lexer.customTextValueIt:
s &= it
accu = s.parseUInt
else:
discard
value = accu.FancyUInt
reader.lexer.next
# Customised numeric parser for text, accepts embedded quote
proc readValue(reader: var JsonReader, value: var FancyText) =
reader.registerVisit:
var (s, esc) = ("",false)
reader.lexer.customBlobValueIt:
let c = it.chr
if esc:
s &= c
esc = false
elif c == '\\':
esc = true
elif c != '"':
s &= c
else:
doNext = StopSwallowByte
value = s.FancyText
reader.lexer.next
# TODO `borrowSerialization` still doesn't work
# properly when it's placed in another module:
Meter.borrowSerialization int
@ -371,3 +446,67 @@ suite "toJson tests":
# clarity regarding the memory allocation approach
Json.decode("null", cstring)
suite "Custom parser tests":
test "Fall back to int parser":
customVisit = TokenRegistry.default
let
jData = test_dedent"""
{
"name": "FancyInt",
"data": -12345
}
"""
dData = Json.decode(jData, HasFancyInt)
check dData.name == "FancyInt"
check dData.data.int == -12345
check customVisit == (tkNumeric, tkCurlyRi, false)
test "Uint parser on negative integer":
customVisit = TokenRegistry.default
let
jData = test_dedent"""
{
"name": "FancyUInt",
"data": -12345
}
"""
dData = Json.decode(jData, HasFancyUInt)
check dData.name == "FancyUInt"
check dData.data.uint == 12345u # abs value
check customVisit == (tkNumeric, tkExNegInt, false)
test "Uint parser on string integer":
customVisit = TokenRegistry.default
let
jData = test_dedent"""
{
"name": "FancyUInt",
"data": "12345"
}
"""
dData = Json.decode(jData, HasFancyUInt)
check dData.name == "FancyUInt"
check dData.data.uint == 12345u
check customVisit == (tkQuoted, tkExBlob, false)
test "Parser on text blob with embedded quote (backlash escape support)":
customVisit = TokenRegistry.default
let
jData = test_dedent"""
{
"name": "FancyText",
"data": "a\bc\"\\def"
}
"""
dData = Json.decode(jData, HasFancyText)
check dData.name == "FancyText"
check dData.data.string == "abc\"\\def"
check customVisit == (tkQuoted, tkExBlob, false)