Lazy JSON parser (#42)

* Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 17:33:40 +01:00 · 2022-05-05 17:33:40 +01:00 · 3509706517
parent 074cd026e6
commit 3509706517
5 changed files with 455 additions and 63 deletions
--- a/json_serialization.nimble
+++ b/json_serialization.nimble
@ -16,6 +16,7 @@ proc test(args, path: string) =
    mkDir "build"

  exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args &
+    " -d:nimOldCaseObjects " &
    " -r --hints:off --skipParentCfg --styleCheck:usages --styleCheck:error " & path

 task test, "Run all tests":
--- a/json_serialization/lexer.nim
+++ b/json_serialization/lexer.nim
@ -6,7 +6,30 @@ import
 export
  inputs, types

+{.push raises: [Defect].}
+
 type
+  CustomIntHandler* = ##\
+    ## Custom decimal integer parser, result values need to be captured
+    proc(dgt: int) {.gcsafe, raises: [Defect].}
+
+  CustomByteAction* = enum
+    Continue ##\
+      ## Default initialisation when provided to a `CustomBlobHandler` parser\
+      ## function type via call-by-reference
+
+    StopBeforeByte ##\
+      ## Stop feeding and do not consume the current `byte` argument
+
+    StopSwallowByte ##\
+      ## Stop and discard current `byte` argument (e.g. the last double quote\
+      ## '"' for a genuine string parser.)
+
+  CustomBlobHandler* = ##\
+    ## Custom text or binary parser, result values need to be captured. The\
+    ## second argument `what` controlls the next action.
+    proc(b: byte; what: var CustomByteAction) {.gcsafe, raises: [Defect].}
+
  TokKind* = enum
    tkError,
    tkEof,
@ -22,7 +45,20 @@ type
    tkBracketLe,
    tkBracketRi,
    tkColon,
-    tkComma
+    tkComma,
+
+    tkQuoted, ##\
+      ## unfinished/lazy type, eventally becomes `tkString`
+    tkExBlob, ##\
+      ## externally held string value after successful custom parsing
+
+    tkNumeric, ##\
+      ## unfinished/lazy type, any of `tkInt`, `tkNegativeInt`, `tkFloat`
+    tkExInt, ##\
+      ## externally held non-negative integer value after successful custom\
+      ## parsing
+    tkExNegInt
+      ## externally held negative integer value after successful custom parsing

  JsonErrorKind* = enum
    errNone                 = "no error",
@ -39,6 +75,8 @@ type
    errCommentExpected      = "comment expected"
    errOrphanSurrogate      = "unicode surrogates must be followed by another unicode character"
    errNonPortableInt       = "number is outside the range of portable values"
+    errCustomIntExpexted    = "not a customised integer"
+    errCustomBlobExpexted   = "not a customised quoted blob"

  JsonLexer* = object
    stream*: InputStream
@ -48,7 +86,7 @@ type
    lineStartPos: int
    tokenStart: int

-    tok*: TokKind
+    tokKind: TokKind   # formerly `tok`, now accessible by getter
    err*: JsonErrorKind

    absIntVal*: uint64 # BEWARE: negative integers will have tok == tkNegativeInt
@ -61,9 +99,23 @@ const
                 1e20, 1e21, 1e22] # TODO: this table should be much larger
                                   # The largest JSON number value is 1E308

-proc renderTok*(lexer: JsonLexer, output: var string) =
-  case lexer.tok
-  of tkError, tkEof:
+# needed in renderTok()
+proc scanNumber(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
+proc scanString(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
+
+proc renderTok*(lexer: var JsonLexer, output: var string)
+    {.gcsafe, raises: [Defect,IOError].} =
+  # The lazy part
+  case lexer.tokKind
+  of tkNumeric:
+    lexer.scanNumber
+  of tkQuoted:
+    lexer.scanString
+  else:
+    discard
+  # The real stuff
+  case lexer.tokKind
+  of tkError, tkEof, tkNumeric, tkExInt, tkExNegInt, tkQuoted, tkExBlob:
    discard
  of tkString:
    output.add '"'
@ -101,14 +153,14 @@ template peek(s: InputStream): char =
 template read(s: InputStream): char =
  char inputs.read(s)

-proc hexCharValue(c: char): int {.inline.} =
+proc hexCharValue(c: char): int =
  case c
  of '0'..'9': ord(c) - ord('0')
  of 'a'..'f': ord(c) - ord('a') + 10
  of 'A'..'F': ord(c) - ord('A') + 10
  else: -1

-proc isDigit(c: char): bool {.inline.} =
+proc isDigit(c: char): bool =
  return (c >= '0' and c <= '9')

 proc col*(lexer: JsonLexer): int =
@ -123,7 +175,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =
    line: 1,
    lineStartPos: 0,
    tokenStart: -1,
-    tok: tkError,
+    tokKind: tkError,
    err: errNone,
    absIntVal: uint64 0,
    floatVal: 0'f,
@ -131,7 +183,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =

 template error(error: JsonErrorKind) {.dirty.} =
  lexer.err = error
-  lexer.tok = tkError
+  lexer.tokKind = tkError
  return

 template checkForUnexpectedEof {.dirty.} =
@ -142,18 +194,19 @@ template requireNextChar(): char =
  checkForUnexpectedEof()
  lexer.stream.read()

-template checkForNonPortableInt(val: uint64) =
-  if lexer.mode == Portable and val > uint64(maxPortableInt):
+template checkForNonPortableInt(val: uint64; overflow: bool) =
+  if overflow or (lexer.mode == Portable and val > uint64(maxPortableInt)):
    error errNonPortableInt

-proc scanHexRune(lexer: var JsonLexer): int =
+proc scanHexRune(lexer: var JsonLexer): int
+    {.gcsafe, raises: [Defect,IOError].} =
  for i in 0..3:
    let hexValue = hexCharValue requireNextChar()
    if hexValue == -1: error errHexCharExpected
    result = (result shl 4) or hexValue

 proc scanString(lexer: var JsonLexer) =
-  lexer.tok = tkString
+  lexer.tokKind = tkString
  lexer.strVal.setLen 0
  lexer.tokenStart = lexer.stream.pos

@ -185,13 +238,13 @@ proc scanString(lexer: var JsonLexer) =
        lexer.strVal.add '\x00'
      of 'u':
        var rune = lexer.scanHexRune()
-        if lexer.tok == tkError: return
+        if lexer.tokKind == tkError: return
        # Deal with surrogates
        if (rune and 0xfc00) == 0xd800:
          if requireNextChar() != '\\': error errOrphanSurrogate
          if requireNextChar() != 'u': error errOrphanSurrogate
          let nextRune = lexer.scanHexRune()
-          if lexer.tok == tkError: return
+          if lexer.tokKind == tkError: return
          if (nextRune and 0xfc00) == 0xdc00:
            rune = 0x10000 + (((rune - 0xd800) shl 10) or (nextRune - 0xdc00))
        lexer.strVal.add toUTF8(Rune(rune))
@ -203,12 +256,13 @@ proc scanString(lexer: var JsonLexer) =
    else:
      lexer.strVal.add c

-proc handleLF(lexer: var JsonLexer) {.inline.} =
+proc handleLF(lexer: var JsonLexer) =
  advance lexer.stream
  lexer.line += 1
  lexer.lineStartPos = lexer.stream.pos

-proc skipWhitespace(lexer: var JsonLexer) =
+proc skipWhitespace(lexer: var JsonLexer)
+    {.gcsafe, raises: [Defect,IOError].} =
  template handleCR =
    # Beware: this is a template, because the return
    # statement has to exit `skipWhitespace`.
@ -275,7 +329,8 @@ template eatDigitAndPeek: char =
  if not lexer.stream.readable: return
  lexer.stream.peek()

-proc scanSign(lexer: var JsonLexer): int =
+proc scanSign(lexer: var JsonLexer): int
+    {.gcsafe, raises: [Defect,IOError].} =
  # Returns +1 or -1
  # If a sign character is present, it must be followed
  # by more characters representing the number. If this
@ -290,16 +345,35 @@ proc scanSign(lexer: var JsonLexer): int =
    advance lexer.stream
  return 1

-proc scanInt(lexer: var JsonLexer): uint64 =
+proc scanInt(lexer: var JsonLexer): (uint64,bool)
+    {.gcsafe, raises: [Defect,IOError].} =
+  ## Scan unsigned integer into uint64 if possible.
+  ## If all goes ok, the tuple `(parsed-value,false)` is returned.
+  ## On overflow, the tuple `(uint64.high,true)` is returned.
  var c = lexer.stream.peek()
-  result = uint64(ord(c) - ord('0'))

-  c = eatDigitAndPeek()
+  # Always possible to append `9` is result[0] is not larger
+  const canAppendDigit9 = (uint64.high - 9) div 10
+
+  result[0] = uint64(ord(c) - ord('0'))
+
+  c = eatDigitAndPeek() # implicit auto-return
  while c.isDigit:
-    result = result * 10 + uint64(ord(c) - ord('0'))
-    c = eatDigitAndPeek()
+    # Process next digit unless overflow
+    if not result[1]:
+      let lsDgt = uint64(ord(c) - ord('0'))
+      if canAppendDigit9 < result[0] and
+          (uint64.high - lsDgt) div 10 < result[0]:
+        result[1] = true
+        result[0] = uint64.high
+      else:
+        result[0] = result[0] * 10 + lsDgt
+    # Fetch next digit
+    c = eatDigitAndPeek() # implicit auto-return

-proc scanNumber(lexer: var JsonLexer) =
+
+proc scanNumber(lexer: var JsonLexer)
+    {.gcsafe, raises: [Defect,IOError].} =
  var sign = lexer.scanSign()
  if sign == 0: return
  var c = lexer.stream.peek()
@ -307,18 +381,18 @@ proc scanNumber(lexer: var JsonLexer) =
  if c == '.':
    advance lexer.stream
    requireMoreNumberChars: discard
-    lexer.tok = tkFloat
+    lexer.tokKind = tkFloat
    c = lexer.stream.peek()
  elif c.isDigit:
-    lexer.tok = if sign > 0: tkInt
-                else: tkNegativeInt
-    let scannedValue = lexer.scanInt()
-    checkForNonPortableInt scannedValue
+    lexer.tokKind = if sign > 0: tkInt
+                    else: tkNegativeInt
+    let (scannedValue,overflow) = lexer.scanInt()
+    checkForNonPortableInt scannedValue, overflow
    lexer.absIntVal = scannedValue
    if not lexer.stream.readable: return
    c = lexer.stream.peek()
    if c == '.':
-      lexer.tok = tkFloat
+      lexer.tokKind = tkFloat
      lexer.floatVal = float(lexer.absIntVal) * float(sign)
      c = eatDigitAndPeek()
  else:
@ -338,7 +412,7 @@ proc scanNumber(lexer: var JsonLexer) =
    if not isDigit lexer.stream.peek():
      error errNumberExpected

-    let exponent = lexer.scanInt()
+    let (exponent,_) = lexer.scanInt()
    if exponent >= uint64(len(powersOfTen)):
      error errExponentTooLarge

@ -351,47 +425,170 @@ proc scanIdentifier(lexer: var JsonLexer,
                    expectedIdent: string, expectedTok: TokKind) =
  for c in expectedIdent:
    if c != lexer.stream.read():
-      lexer.tok = tkError
+      lexer.tokKind = tkError
      return
-  lexer.tok = expectedTok
+  lexer.tokKind = expectedTok

-proc next*(lexer: var JsonLexer) =
+proc accept*(lexer: var JsonLexer)
+    {.gcsafe, raises: [Defect,IOError].} =
+  ## Finalise token by parsing the value. Note that this might change
+  ## the token type
+  case lexer.tokKind
+  of tkNumeric:
+    lexer.scanNumber
+  of tkQuoted:
+    lexer.scanString
+  else:
+    discard
+
+proc next*(lexer: var JsonLexer)
+    {.gcsafe, raises: [Defect,IOError].} =
  lexer.skipWhitespace()

  if not lexer.stream.readable:
-    lexer.tok = tkEof
+    lexer.tokKind = tkEof
    return

+  # in case the value parsing was missing
+  lexer.accept()
+  lexer.strVal.setLen 0 # release memory (if any)
+
  let c = lexer.stream.peek()
  case c
  of '+', '-', '.', '0'..'9':
-    lexer.scanNumber()
+    lexer.tokKind = tkNumeric
  of '"':
-    lexer.scanString()
+    lexer.tokKind = tkQuoted
  of '[':
    advance lexer.stream
-    lexer.tok = tkBracketLe
+    lexer.tokKind = tkBracketLe
  of '{':
    advance lexer.stream
-    lexer.tok = tkCurlyLe
+    lexer.tokKind = tkCurlyLe
  of ']':
    advance lexer.stream
-    lexer.tok = tkBracketRi
+    lexer.tokKind = tkBracketRi
  of '}':
    advance lexer.stream
-    lexer.tok = tkCurlyRi
+    lexer.tokKind = tkCurlyRi
  of ',':
    advance lexer.stream
-    lexer.tok = tkComma
+    lexer.tokKind = tkComma
  of ':':
    advance lexer.stream
-    lexer.tok = tkColon
+    lexer.tokKind = tkColon
  of '\0':
-    lexer.tok = tkEof
+    lexer.tokKind = tkEof
  of 'n': lexer.scanIdentifier("null", tkNull)
  of 't': lexer.scanIdentifier("true", tkTrue)
  of 'f': lexer.scanIdentifier("false", tkFalse)
  else:
    advance lexer.stream
-    lexer.tok = tkError
+    lexer.tokKind = tkError

+proc tok*(lexer: var JsonLexer): TokKind
+    {.gcsafe, raises: [Defect,IOError].} =
+  ## Getter, implies full token parsing
+  lexer.accept
+  lexer.tokKind
+
+proc lazyTok*(lexer: JsonLexer): TokKind =
+  ## Preliminary token state unless accepted, already
+  lexer.tokKind
+
+
+proc customIntHandler*(lexer: var JsonLexer; handler: CustomIntHandler)
+    {.gcsafe, raises: [Defect,IOError].} =
+  ## Apply the `handler` argument function for parsing a `tkNumeric` type
+  ## value. This function sets the token state to `tkExInt`, `tkExNegInt`,
+  ## or `tkError`.
+  proc customScan(lexer: var JsonLexer)
+    {.gcsafe, raises: [Defect,IOError].} =
+    var c = lexer.stream.peek()
+    handler(ord(c) - ord('0'))
+    c = eatDigitAndPeek()   # implicit auto-return
+    while c.isDigit:
+      handler(ord(c) - ord('0'))
+      c = eatDigitAndPeek() # implicit auto-return
+
+  if lexer.tokKind == tkNumeric:
+    var sign = lexer.scanSign()
+    if sign != 0:
+      if lexer.stream.peek.isDigit:
+        lexer.tokKind = if 0 < sign: tkExInt else: tkExNegInt
+        lexer.customScan
+        if not lexer.stream.readable or lexer.stream.peek != '.':
+          return
+
+  error errCustomIntExpexted
+
+proc customBlobHandler*(lexer: var JsonLexer; handler: CustomBlobHandler)
+    {.gcsafe, raises: [Defect,IOError].} =
+  ## Apply the `handler` argument function for parsing a `tkQuoted` type
+  ## value. This function sets the token state to `tkExBlob`, or `tkError`.
+  proc customScan(lexer: var JsonLexer)
+      {.gcsafe, raises: [Defect,IOError].} =
+    var what = Continue
+    while lexer.stream.readable:
+      var c = lexer.stream.peek
+      handler(c.byte, what)
+      case what
+      of StopBeforeByte:
+        break
+      of StopSwallowByte:
+        advance lexer.stream
+        break
+      of Continue:
+        advance lexer.stream
+
+  if lexer.tokKind == tkQuoted:
+    advance lexer.stream
+    lexer.tokKind = tkExBlob
+    lexer.customScan
+    return
+
+  error errCustomBlobExpexted
+
+
+template customIntValueIt*(lexer: var JsonLexer; body: untyped): untyped =
+  ## Convenience wrapper around `customIntHandler()` for parsing integers.
+  ##
+  ## The `body` argument represents a virtual function body. So the current
+  ## digit processing can be exited with `return`.
+  var handler: CustomIntHandler =
+    proc(digit: int) =
+      let it {.inject.} = digit
+      body
+  lexer.customIntHandler(handler)
+
+template customBlobValueIt*(lexer: var JsonLexer; body: untyped): untyped =
+  ## Convenience wrapper around `customBlobHandler()` for parsing any byte
+  ## object. The body function needs to terminate explicitely with the typical
+  ## phrase `doNext = StopSwallowByte` or with the more unusual phrase
+  ## `doNext = StopBeforeByte`.
+  ##
+  ## The `body` argument represents a virtual function body. So the current
+  ## byte processing can be exited with `return`.
+  var handler: CustomBlobHandler =
+    proc(c: byte; what: var CustomByteAction) =
+      let it {.inject.} = c
+      var doNext {.inject.} = what
+      body
+      what = doNext
+  lexer.customBlobHandler(handler)
+
+template customTextValueIt*(lexer: var JsonLexer; body: untyped): untyped =
+  ## Convenience wrapper around `customBlobHandler()` for parsing a text
+  ## terminating with a double quote character '"' (no inner double quote
+  ## allowed.)
+  ##
+  ## The `body` argument represents a virtual function body. So the current
+  ## character processing can be exited with `return`.
+  var handler: CustomBlobHandler =
+    proc(c: byte; what: var CustomByteAction) =
+      let it {.inject.} = c.chr
+      if it == '"':
+        what = StopSwallowByte
+      else:
+        body
+  lexer.customBlobHandler(handler)
--- a/json_serialization/reader.nim
+++ b/json_serialization/reader.nim
@ -101,7 +101,7 @@ proc raiseUnexpectedToken*(r: JsonReader, expected: ExpectedTokenCategory)
                          {.noreturn.} =
  var ex = new UnexpectedTokenError
  ex.assignLineNumber(r)
-  ex.encountedToken = r.lexer.tok
+  ex.encountedToken = r.lexer.lazyTok
  ex.expectedToken = expected
  raise ex

@ -155,7 +155,7 @@ proc init*(T: type JsonReader,
 proc setParsed[T: enum](e: var T, s: string) =
  e = parseEnum[T](s)

-proc requireToken*(r: JsonReader, tk: TokKind) =
+proc requireToken*(r: var JsonReader, tk: TokKind) =
  if r.lexer.tok != tk:
    r.raiseUnexpectedToken case tk
      of tkString: etString
@ -262,6 +262,10 @@ proc parseJsonNode(r: var JsonReader): JsonNode =
    result = JsonNode(kind: JNull)
    r.lexer.next()

+  of tkQuoted, tkExBlob, tkNumeric, tkExInt, tkExNegInt:
+    raiseAssert "generic type " & $r.lexer.lazyTok & " is not applicable"
+
+
 proc skipSingleJsValue(r: var JsonReader) =
  case r.lexer.tok
  of tkCurlyLe:
@ -292,7 +296,9 @@ proc skipSingleJsValue(r: var JsonReader) =
  of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
    r.raiseUnexpectedToken etValue

-  of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
+  of tkString, tkQuoted, tkExBlob,
+     tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
+     tkTrue, tkFalse, tkNull:
    r.lexer.next()

 proc captureSingleJsValue(r: var JsonReader, output: var string) =
@ -335,7 +341,9 @@ proc captureSingleJsValue(r: var JsonReader, output: var string) =
  of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
    r.raiseUnexpectedToken etValue

-  of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
+  of tkString, tkQuoted, tkExBlob,
+     tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
+     tkTrue, tkFalse, tkNull:
    r.lexer.next()

 proc allocPtr[T](p: var ptr T) =
@ -348,7 +356,7 @@ iterator readArray*(r: var JsonReader, ElemType: typedesc): ElemType =
  mixin readValue

  r.skipToken tkBracketLe
-  if r.lexer.tok != tkBracketRi:
+  if r.lexer.lazyTok != tkBracketRi:
    while true:
      var res: ElemType
      readValue(r, res)
@ -362,14 +370,14 @@ iterator readObjectFields*(r: var JsonReader,
  mixin readValue

  r.skipToken tkCurlyLe
-  if r.lexer.tok != tkCurlyRi:
+  if r.lexer.lazyTok != tkCurlyRi:
    while true:
      var key: KeyType
      readValue(r, key)
-      if r.lexer.tok != tkColon: break
+      if r.lexer.lazyTok != tkColon: break
      r.lexer.next()
      yield key
-      if r.lexer.tok != tkComma: break
+      if r.lexer.lazyTok != tkComma: break
      r.lexer.next()
  r.skipToken tkCurlyRi

@ -394,10 +402,47 @@ template isCharArray(v: auto): bool = false

 proc readValue*[T](r: var JsonReader, value: var T)
                  {.raises: [SerializationError, IOError, Defect].} =
+  ## Master filed/object parser. This function relies on customised sub-mixins for particular
+  ## object types.
+  ##
+  ## Customised readValue() examples:
+  ## ::
+  ##     type
+  ##       FancyInt = distinct int
+  ##       FancyUInt = distinct uint
+  ##
+  ##     proc readValue(reader: var JsonReader, value: var FancyInt) =
+  ##       ## Refer to another readValue() instance
+  ##       value = reader.readValue(int).FancyInt
+  ##
+  ##     proc readValue(reader: var JsonReader, value: var FancyUInt) =
+  ##       ## Provide a full custum version of a readValue() instance
+  ##       if reader.lexer.lazyTok == tkNumeric:
+  ##         # lazyTok: Check token before the value is available
+  ##         var accu: FancyUInt
+  ##         # custom parser (the directive `customIntValueIt()` is a
+  ##         # convenience wrapper around `customIntHandler()`.)
+  ##         reader.lexer.customIntValueIt:
+  ##           accu = accu * 10 + it.u256
+  ##         value = accu
+  ##       elif reader.lexer.lazyTok == tkQuoted:
+  ##         var accu = string
+  ##         # The following is really for demo only (inefficient,
+  ##         # lacks hex encoding)
+  ##         reader.lexer.customTextValueIt:
+  ##           accu &= it
+  ##         value = accu.parseUInt.FancyUInt
+  ##       ...
+  ##       # prepare next parser cycle
+  ##       reader.lexer.next
+  ##
  mixin readValue
  type ReaderType {.used.} = type r

-  let tok {.used.} = r.lexer.tok
+  when value is (object or tuple):
+    let tok {.used.} = r.lexer.lazyTok
+  else:
+    let tok {.used.} = r.lexer.tok # resove lazy token

  when value is JsonString:
    r.captureSingleJsValue(string value)
@ -527,23 +572,32 @@ proc readValue*[T](r: var JsonReader, value: var T)
    when expectedFields > 0:
      let fields = T.fieldReadersTable(ReaderType)
      var expectedFieldPos = 0
-      while r.lexer.tok == tkString:
+      while true:
+        # Have the assignment parsed of the AVP
+        if r.lexer.lazyTok == tkQuoted:
+          r.lexer.accept
+        if r.lexer.lazyTok != tkString:
+          break
+        # Calculate/assemble handler
        when T is tuple:
          var reader = fields[][expectedFieldPos].reader
          expectedFieldPos += 1
        else:
          var reader = findFieldReader(fields[], r.lexer.strVal, expectedFieldPos)
-        r.lexer.next()
-        r.skipToken tkColon
        if reader != nil:
+          r.lexer.next()
+          r.skipToken tkColon
          reader(value, r)
          inc readFields
-        elif r.allowUnknownFields:
-          r.skipSingleJsValue()
        else:
-          const typeName = typetraits.name(T)
-          r.raiseUnexpectedField(r.lexer.strVal, typeName)
-        if r.lexer.tok == tkComma:
+          r.lexer.next()
+          r.skipToken tkColon
+          if r.allowUnknownFields:
+            r.skipSingleJsValue()
+          else:
+            const typeName = typetraits.name(T)
+            r.raiseUnexpectedField(r.lexer.strVal, typeName)
+        if r.lexer.lazyTok == tkComma:
          r.lexer.next()
        else:
          break
@ -552,6 +606,7 @@ proc readValue*[T](r: var JsonReader, value: var T)
      const typeName = typetraits.name(T)
      r.raiseIncompleteObject(typeName)

+    r.lexer.accept
    r.skipToken tkCurlyRi

  else:
--- a/json_serialization/std/options.nim
+++ b/json_serialization/std/options.nim
@ -8,7 +8,7 @@ proc writeValue*(writer: var JsonWriter, value: Option) =
    writer.writeValue JsonString("null")

 proc readValue*[T](reader: var JsonReader, value: var Option[T]) =
-  let tok = reader.lexer.tok
+  let tok = reader.lexer.lazyTok
  if tok == tkNull:
    reset value
    reader.lexer.next()
--- a/tests/test_serialization.nim
+++ b/tests/test_serialization.nim
@ -3,6 +3,7 @@ import
  serialization/object_serialization,
  serialization/testing/generic_suite,
  ../json_serialization, ./utils,
+  ../json_serialization/lexer,
  ../json_serialization/std/[options, sets, tables]

 type
@ -50,6 +51,80 @@ type
    notNilStr: cstring
    nilStr: cstring

+  # Customised parser tests
+  FancyInt = distinct int
+  FancyUInt = distinct uint
+  FancyText = distinct string
+
+  HasFancyInt = object
+    name: string
+    data: FancyInt
+
+  HasFancyUInt = object
+    name: string
+    data: FancyUInt
+
+  HasFancyText = object
+    name: string
+    data: FancyText
+
+  TokenRegistry = tuple
+    entry, exit: TokKind
+    dup: bool
+
+var
+  customVisit: TokenRegistry
+
+template registerVisit(reader: var JsonReader; body: untyped): untyped =
+  if customVisit.entry == tkError:
+    customVisit.entry = reader.lexer.lazyTok
+    body
+    customVisit.exit = reader.lexer.lazyTok
+  else:
+    customVisit.dup = true
+
+# Customised parser referring to other parser
+proc readValue(reader: var JsonReader, value: var FancyInt) =
+  reader.registerVisit:
+    value = reader.readValue(int).FancyInt
+
+# Customised numeric parser for integer and stringified integer
+proc readValue(reader: var JsonReader, value: var FancyUInt) =
+  reader.registerVisit:
+    var accu = 0u
+    case reader.lexer.lazyTok
+    of tkNumeric:
+      reader.lexer.customIntValueIt:
+        accu = accu * 10u + it.uint
+    of tkQuoted:
+      var s =  ""
+      reader.lexer.customTextValueIt:
+        s &= it
+      accu = s.parseUInt
+    else:
+      discard
+    value = accu.FancyUInt
+  reader.lexer.next
+
+# Customised numeric parser for text, accepts embedded quote
+proc readValue(reader: var JsonReader, value: var FancyText) =
+  reader.registerVisit:
+    var (s, esc) = ("",false)
+    reader.lexer.customBlobValueIt:
+      let c = it.chr
+      if esc:
+        s &= c
+        esc = false
+      elif c == '\\':
+        esc = true
+      elif c != '"':
+        s &= c
+      else:
+        doNext = StopSwallowByte
+    value = s.FancyText
+  reader.lexer.next
+
+
 # TODO `borrowSerialization` still doesn't work
 # properly when it's placed in another module:
 Meter.borrowSerialization int
@ -371,3 +446,67 @@ suite "toJson tests":
      # clarity regarding the memory allocation approach
      Json.decode("null", cstring)

+suite "Custom parser tests":
+  test "Fall back to int parser":
+    customVisit = TokenRegistry.default
+
+    let
+      jData = test_dedent"""
+        {
+          "name": "FancyInt",
+          "data": -12345
+        }
+      """
+      dData = Json.decode(jData, HasFancyInt)
+
+    check dData.name == "FancyInt"
+    check dData.data.int == -12345
+    check customVisit == (tkNumeric, tkCurlyRi, false)
+
+  test "Uint parser on negative integer":
+    customVisit = TokenRegistry.default
+
+    let
+      jData = test_dedent"""
+        {
+          "name": "FancyUInt",
+          "data": -12345
+        }
+      """
+      dData = Json.decode(jData, HasFancyUInt)
+
+    check dData.name == "FancyUInt"
+    check dData.data.uint == 12345u # abs value
+    check customVisit == (tkNumeric, tkExNegInt, false)
+
+  test "Uint parser on string integer":
+    customVisit = TokenRegistry.default
+
+    let
+      jData = test_dedent"""
+        {
+          "name": "FancyUInt",
+          "data": "12345"
+        }
+      """
+      dData = Json.decode(jData, HasFancyUInt)
+
+    check dData.name == "FancyUInt"
+    check dData.data.uint == 12345u
+    check customVisit == (tkQuoted, tkExBlob, false)
+
+  test "Parser on text blob with embedded quote (backlash escape support)":
+    customVisit = TokenRegistry.default
+
+    let
+      jData = test_dedent"""
+        {
+          "name": "FancyText",
+          "data": "a\bc\"\\def"
+        }
+      """
+      dData = Json.decode(jData, HasFancyText)
+
+    check dData.name == "FancyText"
+    check dData.data.string == "abc\"\\def"
+    check customVisit == (tkQuoted, tkExBlob, false)