Lazy JSON parser (#42)
* Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
This commit is contained in:
parent
074cd026e6
commit
3509706517
|
@ -16,6 +16,7 @@ proc test(args, path: string) =
|
|||
mkDir "build"
|
||||
|
||||
exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args &
|
||||
" -d:nimOldCaseObjects " &
|
||||
" -r --hints:off --skipParentCfg --styleCheck:usages --styleCheck:error " & path
|
||||
|
||||
task test, "Run all tests":
|
||||
|
|
|
@ -6,7 +6,30 @@ import
|
|||
export
|
||||
inputs, types
|
||||
|
||||
{.push raises: [Defect].}
|
||||
|
||||
type
|
||||
CustomIntHandler* = ##\
|
||||
## Custom decimal integer parser, result values need to be captured
|
||||
proc(dgt: int) {.gcsafe, raises: [Defect].}
|
||||
|
||||
CustomByteAction* = enum
|
||||
Continue ##\
|
||||
## Default initialisation when provided to a `CustomBlobHandler` parser\
|
||||
## function type via call-by-reference
|
||||
|
||||
StopBeforeByte ##\
|
||||
## Stop feeding and do not consume the current `byte` argument
|
||||
|
||||
StopSwallowByte ##\
|
||||
## Stop and discard current `byte` argument (e.g. the last double quote\
|
||||
## '"' for a genuine string parser.)
|
||||
|
||||
CustomBlobHandler* = ##\
|
||||
## Custom text or binary parser, result values need to be captured. The\
|
||||
## second argument `what` controlls the next action.
|
||||
proc(b: byte; what: var CustomByteAction) {.gcsafe, raises: [Defect].}
|
||||
|
||||
TokKind* = enum
|
||||
tkError,
|
||||
tkEof,
|
||||
|
@ -22,7 +45,20 @@ type
|
|||
tkBracketLe,
|
||||
tkBracketRi,
|
||||
tkColon,
|
||||
tkComma
|
||||
tkComma,
|
||||
|
||||
tkQuoted, ##\
|
||||
## unfinished/lazy type, eventally becomes `tkString`
|
||||
tkExBlob, ##\
|
||||
## externally held string value after successful custom parsing
|
||||
|
||||
tkNumeric, ##\
|
||||
## unfinished/lazy type, any of `tkInt`, `tkNegativeInt`, `tkFloat`
|
||||
tkExInt, ##\
|
||||
## externally held non-negative integer value after successful custom\
|
||||
## parsing
|
||||
tkExNegInt
|
||||
## externally held negative integer value after successful custom parsing
|
||||
|
||||
JsonErrorKind* = enum
|
||||
errNone = "no error",
|
||||
|
@ -39,6 +75,8 @@ type
|
|||
errCommentExpected = "comment expected"
|
||||
errOrphanSurrogate = "unicode surrogates must be followed by another unicode character"
|
||||
errNonPortableInt = "number is outside the range of portable values"
|
||||
errCustomIntExpexted = "not a customised integer"
|
||||
errCustomBlobExpexted = "not a customised quoted blob"
|
||||
|
||||
JsonLexer* = object
|
||||
stream*: InputStream
|
||||
|
@ -48,7 +86,7 @@ type
|
|||
lineStartPos: int
|
||||
tokenStart: int
|
||||
|
||||
tok*: TokKind
|
||||
tokKind: TokKind # formerly `tok`, now accessible by getter
|
||||
err*: JsonErrorKind
|
||||
|
||||
absIntVal*: uint64 # BEWARE: negative integers will have tok == tkNegativeInt
|
||||
|
@ -61,9 +99,23 @@ const
|
|||
1e20, 1e21, 1e22] # TODO: this table should be much larger
|
||||
# The largest JSON number value is 1E308
|
||||
|
||||
proc renderTok*(lexer: JsonLexer, output: var string) =
|
||||
case lexer.tok
|
||||
of tkError, tkEof:
|
||||
# needed in renderTok()
|
||||
proc scanNumber(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
|
||||
proc scanString(lexer: var JsonLexer) {.gcsafe, raises: [Defect,IOError].}
|
||||
|
||||
proc renderTok*(lexer: var JsonLexer, output: var string)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
# The lazy part
|
||||
case lexer.tokKind
|
||||
of tkNumeric:
|
||||
lexer.scanNumber
|
||||
of tkQuoted:
|
||||
lexer.scanString
|
||||
else:
|
||||
discard
|
||||
# The real stuff
|
||||
case lexer.tokKind
|
||||
of tkError, tkEof, tkNumeric, tkExInt, tkExNegInt, tkQuoted, tkExBlob:
|
||||
discard
|
||||
of tkString:
|
||||
output.add '"'
|
||||
|
@ -101,14 +153,14 @@ template peek(s: InputStream): char =
|
|||
template read(s: InputStream): char =
|
||||
char inputs.read(s)
|
||||
|
||||
proc hexCharValue(c: char): int {.inline.} =
|
||||
proc hexCharValue(c: char): int =
|
||||
case c
|
||||
of '0'..'9': ord(c) - ord('0')
|
||||
of 'a'..'f': ord(c) - ord('a') + 10
|
||||
of 'A'..'F': ord(c) - ord('A') + 10
|
||||
else: -1
|
||||
|
||||
proc isDigit(c: char): bool {.inline.} =
|
||||
proc isDigit(c: char): bool =
|
||||
return (c >= '0' and c <= '9')
|
||||
|
||||
proc col*(lexer: JsonLexer): int =
|
||||
|
@ -123,7 +175,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =
|
|||
line: 1,
|
||||
lineStartPos: 0,
|
||||
tokenStart: -1,
|
||||
tok: tkError,
|
||||
tokKind: tkError,
|
||||
err: errNone,
|
||||
absIntVal: uint64 0,
|
||||
floatVal: 0'f,
|
||||
|
@ -131,7 +183,7 @@ proc init*(T: type JsonLexer, stream: InputStream, mode = defaultJsonMode): T =
|
|||
|
||||
template error(error: JsonErrorKind) {.dirty.} =
|
||||
lexer.err = error
|
||||
lexer.tok = tkError
|
||||
lexer.tokKind = tkError
|
||||
return
|
||||
|
||||
template checkForUnexpectedEof {.dirty.} =
|
||||
|
@ -142,18 +194,19 @@ template requireNextChar(): char =
|
|||
checkForUnexpectedEof()
|
||||
lexer.stream.read()
|
||||
|
||||
template checkForNonPortableInt(val: uint64) =
|
||||
if lexer.mode == Portable and val > uint64(maxPortableInt):
|
||||
template checkForNonPortableInt(val: uint64; overflow: bool) =
|
||||
if overflow or (lexer.mode == Portable and val > uint64(maxPortableInt)):
|
||||
error errNonPortableInt
|
||||
|
||||
proc scanHexRune(lexer: var JsonLexer): int =
|
||||
proc scanHexRune(lexer: var JsonLexer): int
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
for i in 0..3:
|
||||
let hexValue = hexCharValue requireNextChar()
|
||||
if hexValue == -1: error errHexCharExpected
|
||||
result = (result shl 4) or hexValue
|
||||
|
||||
proc scanString(lexer: var JsonLexer) =
|
||||
lexer.tok = tkString
|
||||
lexer.tokKind = tkString
|
||||
lexer.strVal.setLen 0
|
||||
lexer.tokenStart = lexer.stream.pos
|
||||
|
||||
|
@ -185,13 +238,13 @@ proc scanString(lexer: var JsonLexer) =
|
|||
lexer.strVal.add '\x00'
|
||||
of 'u':
|
||||
var rune = lexer.scanHexRune()
|
||||
if lexer.tok == tkError: return
|
||||
if lexer.tokKind == tkError: return
|
||||
# Deal with surrogates
|
||||
if (rune and 0xfc00) == 0xd800:
|
||||
if requireNextChar() != '\\': error errOrphanSurrogate
|
||||
if requireNextChar() != 'u': error errOrphanSurrogate
|
||||
let nextRune = lexer.scanHexRune()
|
||||
if lexer.tok == tkError: return
|
||||
if lexer.tokKind == tkError: return
|
||||
if (nextRune and 0xfc00) == 0xdc00:
|
||||
rune = 0x10000 + (((rune - 0xd800) shl 10) or (nextRune - 0xdc00))
|
||||
lexer.strVal.add toUTF8(Rune(rune))
|
||||
|
@ -203,12 +256,13 @@ proc scanString(lexer: var JsonLexer) =
|
|||
else:
|
||||
lexer.strVal.add c
|
||||
|
||||
proc handleLF(lexer: var JsonLexer) {.inline.} =
|
||||
proc handleLF(lexer: var JsonLexer) =
|
||||
advance lexer.stream
|
||||
lexer.line += 1
|
||||
lexer.lineStartPos = lexer.stream.pos
|
||||
|
||||
proc skipWhitespace(lexer: var JsonLexer) =
|
||||
proc skipWhitespace(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
template handleCR =
|
||||
# Beware: this is a template, because the return
|
||||
# statement has to exit `skipWhitespace`.
|
||||
|
@ -275,7 +329,8 @@ template eatDigitAndPeek: char =
|
|||
if not lexer.stream.readable: return
|
||||
lexer.stream.peek()
|
||||
|
||||
proc scanSign(lexer: var JsonLexer): int =
|
||||
proc scanSign(lexer: var JsonLexer): int
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
# Returns +1 or -1
|
||||
# If a sign character is present, it must be followed
|
||||
# by more characters representing the number. If this
|
||||
|
@ -290,16 +345,35 @@ proc scanSign(lexer: var JsonLexer): int =
|
|||
advance lexer.stream
|
||||
return 1
|
||||
|
||||
proc scanInt(lexer: var JsonLexer): uint64 =
|
||||
proc scanInt(lexer: var JsonLexer): (uint64,bool)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
## Scan unsigned integer into uint64 if possible.
|
||||
## If all goes ok, the tuple `(parsed-value,false)` is returned.
|
||||
## On overflow, the tuple `(uint64.high,true)` is returned.
|
||||
var c = lexer.stream.peek()
|
||||
result = uint64(ord(c) - ord('0'))
|
||||
|
||||
c = eatDigitAndPeek()
|
||||
# Always possible to append `9` is result[0] is not larger
|
||||
const canAppendDigit9 = (uint64.high - 9) div 10
|
||||
|
||||
result[0] = uint64(ord(c) - ord('0'))
|
||||
|
||||
c = eatDigitAndPeek() # implicit auto-return
|
||||
while c.isDigit:
|
||||
result = result * 10 + uint64(ord(c) - ord('0'))
|
||||
c = eatDigitAndPeek()
|
||||
# Process next digit unless overflow
|
||||
if not result[1]:
|
||||
let lsDgt = uint64(ord(c) - ord('0'))
|
||||
if canAppendDigit9 < result[0] and
|
||||
(uint64.high - lsDgt) div 10 < result[0]:
|
||||
result[1] = true
|
||||
result[0] = uint64.high
|
||||
else:
|
||||
result[0] = result[0] * 10 + lsDgt
|
||||
# Fetch next digit
|
||||
c = eatDigitAndPeek() # implicit auto-return
|
||||
|
||||
proc scanNumber(lexer: var JsonLexer) =
|
||||
|
||||
proc scanNumber(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
var sign = lexer.scanSign()
|
||||
if sign == 0: return
|
||||
var c = lexer.stream.peek()
|
||||
|
@ -307,18 +381,18 @@ proc scanNumber(lexer: var JsonLexer) =
|
|||
if c == '.':
|
||||
advance lexer.stream
|
||||
requireMoreNumberChars: discard
|
||||
lexer.tok = tkFloat
|
||||
lexer.tokKind = tkFloat
|
||||
c = lexer.stream.peek()
|
||||
elif c.isDigit:
|
||||
lexer.tok = if sign > 0: tkInt
|
||||
else: tkNegativeInt
|
||||
let scannedValue = lexer.scanInt()
|
||||
checkForNonPortableInt scannedValue
|
||||
lexer.tokKind = if sign > 0: tkInt
|
||||
else: tkNegativeInt
|
||||
let (scannedValue,overflow) = lexer.scanInt()
|
||||
checkForNonPortableInt scannedValue, overflow
|
||||
lexer.absIntVal = scannedValue
|
||||
if not lexer.stream.readable: return
|
||||
c = lexer.stream.peek()
|
||||
if c == '.':
|
||||
lexer.tok = tkFloat
|
||||
lexer.tokKind = tkFloat
|
||||
lexer.floatVal = float(lexer.absIntVal) * float(sign)
|
||||
c = eatDigitAndPeek()
|
||||
else:
|
||||
|
@ -338,7 +412,7 @@ proc scanNumber(lexer: var JsonLexer) =
|
|||
if not isDigit lexer.stream.peek():
|
||||
error errNumberExpected
|
||||
|
||||
let exponent = lexer.scanInt()
|
||||
let (exponent,_) = lexer.scanInt()
|
||||
if exponent >= uint64(len(powersOfTen)):
|
||||
error errExponentTooLarge
|
||||
|
||||
|
@ -351,47 +425,170 @@ proc scanIdentifier(lexer: var JsonLexer,
|
|||
expectedIdent: string, expectedTok: TokKind) =
|
||||
for c in expectedIdent:
|
||||
if c != lexer.stream.read():
|
||||
lexer.tok = tkError
|
||||
lexer.tokKind = tkError
|
||||
return
|
||||
lexer.tok = expectedTok
|
||||
lexer.tokKind = expectedTok
|
||||
|
||||
proc next*(lexer: var JsonLexer) =
|
||||
proc accept*(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
## Finalise token by parsing the value. Note that this might change
|
||||
## the token type
|
||||
case lexer.tokKind
|
||||
of tkNumeric:
|
||||
lexer.scanNumber
|
||||
of tkQuoted:
|
||||
lexer.scanString
|
||||
else:
|
||||
discard
|
||||
|
||||
proc next*(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
lexer.skipWhitespace()
|
||||
|
||||
if not lexer.stream.readable:
|
||||
lexer.tok = tkEof
|
||||
lexer.tokKind = tkEof
|
||||
return
|
||||
|
||||
# in case the value parsing was missing
|
||||
lexer.accept()
|
||||
lexer.strVal.setLen 0 # release memory (if any)
|
||||
|
||||
let c = lexer.stream.peek()
|
||||
case c
|
||||
of '+', '-', '.', '0'..'9':
|
||||
lexer.scanNumber()
|
||||
lexer.tokKind = tkNumeric
|
||||
of '"':
|
||||
lexer.scanString()
|
||||
lexer.tokKind = tkQuoted
|
||||
of '[':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkBracketLe
|
||||
lexer.tokKind = tkBracketLe
|
||||
of '{':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkCurlyLe
|
||||
lexer.tokKind = tkCurlyLe
|
||||
of ']':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkBracketRi
|
||||
lexer.tokKind = tkBracketRi
|
||||
of '}':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkCurlyRi
|
||||
lexer.tokKind = tkCurlyRi
|
||||
of ',':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkComma
|
||||
lexer.tokKind = tkComma
|
||||
of ':':
|
||||
advance lexer.stream
|
||||
lexer.tok = tkColon
|
||||
lexer.tokKind = tkColon
|
||||
of '\0':
|
||||
lexer.tok = tkEof
|
||||
lexer.tokKind = tkEof
|
||||
of 'n': lexer.scanIdentifier("null", tkNull)
|
||||
of 't': lexer.scanIdentifier("true", tkTrue)
|
||||
of 'f': lexer.scanIdentifier("false", tkFalse)
|
||||
else:
|
||||
advance lexer.stream
|
||||
lexer.tok = tkError
|
||||
lexer.tokKind = tkError
|
||||
|
||||
proc tok*(lexer: var JsonLexer): TokKind
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
## Getter, implies full token parsing
|
||||
lexer.accept
|
||||
lexer.tokKind
|
||||
|
||||
proc lazyTok*(lexer: JsonLexer): TokKind =
|
||||
## Preliminary token state unless accepted, already
|
||||
lexer.tokKind
|
||||
|
||||
|
||||
proc customIntHandler*(lexer: var JsonLexer; handler: CustomIntHandler)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
## Apply the `handler` argument function for parsing a `tkNumeric` type
|
||||
## value. This function sets the token state to `tkExInt`, `tkExNegInt`,
|
||||
## or `tkError`.
|
||||
proc customScan(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
var c = lexer.stream.peek()
|
||||
handler(ord(c) - ord('0'))
|
||||
c = eatDigitAndPeek() # implicit auto-return
|
||||
while c.isDigit:
|
||||
handler(ord(c) - ord('0'))
|
||||
c = eatDigitAndPeek() # implicit auto-return
|
||||
|
||||
if lexer.tokKind == tkNumeric:
|
||||
var sign = lexer.scanSign()
|
||||
if sign != 0:
|
||||
if lexer.stream.peek.isDigit:
|
||||
lexer.tokKind = if 0 < sign: tkExInt else: tkExNegInt
|
||||
lexer.customScan
|
||||
if not lexer.stream.readable or lexer.stream.peek != '.':
|
||||
return
|
||||
|
||||
error errCustomIntExpexted
|
||||
|
||||
proc customBlobHandler*(lexer: var JsonLexer; handler: CustomBlobHandler)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
## Apply the `handler` argument function for parsing a `tkQuoted` type
|
||||
## value. This function sets the token state to `tkExBlob`, or `tkError`.
|
||||
proc customScan(lexer: var JsonLexer)
|
||||
{.gcsafe, raises: [Defect,IOError].} =
|
||||
var what = Continue
|
||||
while lexer.stream.readable:
|
||||
var c = lexer.stream.peek
|
||||
handler(c.byte, what)
|
||||
case what
|
||||
of StopBeforeByte:
|
||||
break
|
||||
of StopSwallowByte:
|
||||
advance lexer.stream
|
||||
break
|
||||
of Continue:
|
||||
advance lexer.stream
|
||||
|
||||
if lexer.tokKind == tkQuoted:
|
||||
advance lexer.stream
|
||||
lexer.tokKind = tkExBlob
|
||||
lexer.customScan
|
||||
return
|
||||
|
||||
error errCustomBlobExpexted
|
||||
|
||||
|
||||
template customIntValueIt*(lexer: var JsonLexer; body: untyped): untyped =
|
||||
## Convenience wrapper around `customIntHandler()` for parsing integers.
|
||||
##
|
||||
## The `body` argument represents a virtual function body. So the current
|
||||
## digit processing can be exited with `return`.
|
||||
var handler: CustomIntHandler =
|
||||
proc(digit: int) =
|
||||
let it {.inject.} = digit
|
||||
body
|
||||
lexer.customIntHandler(handler)
|
||||
|
||||
template customBlobValueIt*(lexer: var JsonLexer; body: untyped): untyped =
|
||||
## Convenience wrapper around `customBlobHandler()` for parsing any byte
|
||||
## object. The body function needs to terminate explicitely with the typical
|
||||
## phrase `doNext = StopSwallowByte` or with the more unusual phrase
|
||||
## `doNext = StopBeforeByte`.
|
||||
##
|
||||
## The `body` argument represents a virtual function body. So the current
|
||||
## byte processing can be exited with `return`.
|
||||
var handler: CustomBlobHandler =
|
||||
proc(c: byte; what: var CustomByteAction) =
|
||||
let it {.inject.} = c
|
||||
var doNext {.inject.} = what
|
||||
body
|
||||
what = doNext
|
||||
lexer.customBlobHandler(handler)
|
||||
|
||||
template customTextValueIt*(lexer: var JsonLexer; body: untyped): untyped =
|
||||
## Convenience wrapper around `customBlobHandler()` for parsing a text
|
||||
## terminating with a double quote character '"' (no inner double quote
|
||||
## allowed.)
|
||||
##
|
||||
## The `body` argument represents a virtual function body. So the current
|
||||
## character processing can be exited with `return`.
|
||||
var handler: CustomBlobHandler =
|
||||
proc(c: byte; what: var CustomByteAction) =
|
||||
let it {.inject.} = c.chr
|
||||
if it == '"':
|
||||
what = StopSwallowByte
|
||||
else:
|
||||
body
|
||||
lexer.customBlobHandler(handler)
|
||||
|
|
|
@ -101,7 +101,7 @@ proc raiseUnexpectedToken*(r: JsonReader, expected: ExpectedTokenCategory)
|
|||
{.noreturn.} =
|
||||
var ex = new UnexpectedTokenError
|
||||
ex.assignLineNumber(r)
|
||||
ex.encountedToken = r.lexer.tok
|
||||
ex.encountedToken = r.lexer.lazyTok
|
||||
ex.expectedToken = expected
|
||||
raise ex
|
||||
|
||||
|
@ -155,7 +155,7 @@ proc init*(T: type JsonReader,
|
|||
proc setParsed[T: enum](e: var T, s: string) =
|
||||
e = parseEnum[T](s)
|
||||
|
||||
proc requireToken*(r: JsonReader, tk: TokKind) =
|
||||
proc requireToken*(r: var JsonReader, tk: TokKind) =
|
||||
if r.lexer.tok != tk:
|
||||
r.raiseUnexpectedToken case tk
|
||||
of tkString: etString
|
||||
|
@ -262,6 +262,10 @@ proc parseJsonNode(r: var JsonReader): JsonNode =
|
|||
result = JsonNode(kind: JNull)
|
||||
r.lexer.next()
|
||||
|
||||
of tkQuoted, tkExBlob, tkNumeric, tkExInt, tkExNegInt:
|
||||
raiseAssert "generic type " & $r.lexer.lazyTok & " is not applicable"
|
||||
|
||||
|
||||
proc skipSingleJsValue(r: var JsonReader) =
|
||||
case r.lexer.tok
|
||||
of tkCurlyLe:
|
||||
|
@ -292,7 +296,9 @@ proc skipSingleJsValue(r: var JsonReader) =
|
|||
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
|
||||
r.raiseUnexpectedToken etValue
|
||||
|
||||
of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
of tkString, tkQuoted, tkExBlob,
|
||||
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
|
||||
tkTrue, tkFalse, tkNull:
|
||||
r.lexer.next()
|
||||
|
||||
proc captureSingleJsValue(r: var JsonReader, output: var string) =
|
||||
|
@ -335,7 +341,9 @@ proc captureSingleJsValue(r: var JsonReader, output: var string) =
|
|||
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
|
||||
r.raiseUnexpectedToken etValue
|
||||
|
||||
of tkString, tkInt, tkNegativeInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
of tkString, tkQuoted, tkExBlob,
|
||||
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
|
||||
tkTrue, tkFalse, tkNull:
|
||||
r.lexer.next()
|
||||
|
||||
proc allocPtr[T](p: var ptr T) =
|
||||
|
@ -348,7 +356,7 @@ iterator readArray*(r: var JsonReader, ElemType: typedesc): ElemType =
|
|||
mixin readValue
|
||||
|
||||
r.skipToken tkBracketLe
|
||||
if r.lexer.tok != tkBracketRi:
|
||||
if r.lexer.lazyTok != tkBracketRi:
|
||||
while true:
|
||||
var res: ElemType
|
||||
readValue(r, res)
|
||||
|
@ -362,14 +370,14 @@ iterator readObjectFields*(r: var JsonReader,
|
|||
mixin readValue
|
||||
|
||||
r.skipToken tkCurlyLe
|
||||
if r.lexer.tok != tkCurlyRi:
|
||||
if r.lexer.lazyTok != tkCurlyRi:
|
||||
while true:
|
||||
var key: KeyType
|
||||
readValue(r, key)
|
||||
if r.lexer.tok != tkColon: break
|
||||
if r.lexer.lazyTok != tkColon: break
|
||||
r.lexer.next()
|
||||
yield key
|
||||
if r.lexer.tok != tkComma: break
|
||||
if r.lexer.lazyTok != tkComma: break
|
||||
r.lexer.next()
|
||||
r.skipToken tkCurlyRi
|
||||
|
||||
|
@ -394,10 +402,47 @@ template isCharArray(v: auto): bool = false
|
|||
|
||||
proc readValue*[T](r: var JsonReader, value: var T)
|
||||
{.raises: [SerializationError, IOError, Defect].} =
|
||||
## Master filed/object parser. This function relies on customised sub-mixins for particular
|
||||
## object types.
|
||||
##
|
||||
## Customised readValue() examples:
|
||||
## ::
|
||||
## type
|
||||
## FancyInt = distinct int
|
||||
## FancyUInt = distinct uint
|
||||
##
|
||||
## proc readValue(reader: var JsonReader, value: var FancyInt) =
|
||||
## ## Refer to another readValue() instance
|
||||
## value = reader.readValue(int).FancyInt
|
||||
##
|
||||
## proc readValue(reader: var JsonReader, value: var FancyUInt) =
|
||||
## ## Provide a full custum version of a readValue() instance
|
||||
## if reader.lexer.lazyTok == tkNumeric:
|
||||
## # lazyTok: Check token before the value is available
|
||||
## var accu: FancyUInt
|
||||
## # custom parser (the directive `customIntValueIt()` is a
|
||||
## # convenience wrapper around `customIntHandler()`.)
|
||||
## reader.lexer.customIntValueIt:
|
||||
## accu = accu * 10 + it.u256
|
||||
## value = accu
|
||||
## elif reader.lexer.lazyTok == tkQuoted:
|
||||
## var accu = string
|
||||
## # The following is really for demo only (inefficient,
|
||||
## # lacks hex encoding)
|
||||
## reader.lexer.customTextValueIt:
|
||||
## accu &= it
|
||||
## value = accu.parseUInt.FancyUInt
|
||||
## ...
|
||||
## # prepare next parser cycle
|
||||
## reader.lexer.next
|
||||
##
|
||||
mixin readValue
|
||||
type ReaderType {.used.} = type r
|
||||
|
||||
let tok {.used.} = r.lexer.tok
|
||||
when value is (object or tuple):
|
||||
let tok {.used.} = r.lexer.lazyTok
|
||||
else:
|
||||
let tok {.used.} = r.lexer.tok # resove lazy token
|
||||
|
||||
when value is JsonString:
|
||||
r.captureSingleJsValue(string value)
|
||||
|
@ -527,23 +572,32 @@ proc readValue*[T](r: var JsonReader, value: var T)
|
|||
when expectedFields > 0:
|
||||
let fields = T.fieldReadersTable(ReaderType)
|
||||
var expectedFieldPos = 0
|
||||
while r.lexer.tok == tkString:
|
||||
while true:
|
||||
# Have the assignment parsed of the AVP
|
||||
if r.lexer.lazyTok == tkQuoted:
|
||||
r.lexer.accept
|
||||
if r.lexer.lazyTok != tkString:
|
||||
break
|
||||
# Calculate/assemble handler
|
||||
when T is tuple:
|
||||
var reader = fields[][expectedFieldPos].reader
|
||||
expectedFieldPos += 1
|
||||
else:
|
||||
var reader = findFieldReader(fields[], r.lexer.strVal, expectedFieldPos)
|
||||
r.lexer.next()
|
||||
r.skipToken tkColon
|
||||
if reader != nil:
|
||||
r.lexer.next()
|
||||
r.skipToken tkColon
|
||||
reader(value, r)
|
||||
inc readFields
|
||||
elif r.allowUnknownFields:
|
||||
r.skipSingleJsValue()
|
||||
else:
|
||||
const typeName = typetraits.name(T)
|
||||
r.raiseUnexpectedField(r.lexer.strVal, typeName)
|
||||
if r.lexer.tok == tkComma:
|
||||
r.lexer.next()
|
||||
r.skipToken tkColon
|
||||
if r.allowUnknownFields:
|
||||
r.skipSingleJsValue()
|
||||
else:
|
||||
const typeName = typetraits.name(T)
|
||||
r.raiseUnexpectedField(r.lexer.strVal, typeName)
|
||||
if r.lexer.lazyTok == tkComma:
|
||||
r.lexer.next()
|
||||
else:
|
||||
break
|
||||
|
@ -552,6 +606,7 @@ proc readValue*[T](r: var JsonReader, value: var T)
|
|||
const typeName = typetraits.name(T)
|
||||
r.raiseIncompleteObject(typeName)
|
||||
|
||||
r.lexer.accept
|
||||
r.skipToken tkCurlyRi
|
||||
|
||||
else:
|
||||
|
|
|
@ -8,7 +8,7 @@ proc writeValue*(writer: var JsonWriter, value: Option) =
|
|||
writer.writeValue JsonString("null")
|
||||
|
||||
proc readValue*[T](reader: var JsonReader, value: var Option[T]) =
|
||||
let tok = reader.lexer.tok
|
||||
let tok = reader.lexer.lazyTok
|
||||
if tok == tkNull:
|
||||
reset value
|
||||
reader.lexer.next()
|
||||
|
|
|
@ -3,6 +3,7 @@ import
|
|||
serialization/object_serialization,
|
||||
serialization/testing/generic_suite,
|
||||
../json_serialization, ./utils,
|
||||
../json_serialization/lexer,
|
||||
../json_serialization/std/[options, sets, tables]
|
||||
|
||||
type
|
||||
|
@ -50,6 +51,80 @@ type
|
|||
notNilStr: cstring
|
||||
nilStr: cstring
|
||||
|
||||
# Customised parser tests
|
||||
FancyInt = distinct int
|
||||
FancyUInt = distinct uint
|
||||
FancyText = distinct string
|
||||
|
||||
HasFancyInt = object
|
||||
name: string
|
||||
data: FancyInt
|
||||
|
||||
HasFancyUInt = object
|
||||
name: string
|
||||
data: FancyUInt
|
||||
|
||||
HasFancyText = object
|
||||
name: string
|
||||
data: FancyText
|
||||
|
||||
TokenRegistry = tuple
|
||||
entry, exit: TokKind
|
||||
dup: bool
|
||||
|
||||
var
|
||||
customVisit: TokenRegistry
|
||||
|
||||
template registerVisit(reader: var JsonReader; body: untyped): untyped =
|
||||
if customVisit.entry == tkError:
|
||||
customVisit.entry = reader.lexer.lazyTok
|
||||
body
|
||||
customVisit.exit = reader.lexer.lazyTok
|
||||
else:
|
||||
customVisit.dup = true
|
||||
|
||||
# Customised parser referring to other parser
|
||||
proc readValue(reader: var JsonReader, value: var FancyInt) =
|
||||
reader.registerVisit:
|
||||
value = reader.readValue(int).FancyInt
|
||||
|
||||
# Customised numeric parser for integer and stringified integer
|
||||
proc readValue(reader: var JsonReader, value: var FancyUInt) =
|
||||
reader.registerVisit:
|
||||
var accu = 0u
|
||||
case reader.lexer.lazyTok
|
||||
of tkNumeric:
|
||||
reader.lexer.customIntValueIt:
|
||||
accu = accu * 10u + it.uint
|
||||
of tkQuoted:
|
||||
var s = ""
|
||||
reader.lexer.customTextValueIt:
|
||||
s &= it
|
||||
accu = s.parseUInt
|
||||
else:
|
||||
discard
|
||||
value = accu.FancyUInt
|
||||
reader.lexer.next
|
||||
|
||||
# Customised numeric parser for text, accepts embedded quote
|
||||
proc readValue(reader: var JsonReader, value: var FancyText) =
|
||||
reader.registerVisit:
|
||||
var (s, esc) = ("",false)
|
||||
reader.lexer.customBlobValueIt:
|
||||
let c = it.chr
|
||||
if esc:
|
||||
s &= c
|
||||
esc = false
|
||||
elif c == '\\':
|
||||
esc = true
|
||||
elif c != '"':
|
||||
s &= c
|
||||
else:
|
||||
doNext = StopSwallowByte
|
||||
value = s.FancyText
|
||||
reader.lexer.next
|
||||
|
||||
|
||||
# TODO `borrowSerialization` still doesn't work
|
||||
# properly when it's placed in another module:
|
||||
Meter.borrowSerialization int
|
||||
|
@ -371,3 +446,67 @@ suite "toJson tests":
|
|||
# clarity regarding the memory allocation approach
|
||||
Json.decode("null", cstring)
|
||||
|
||||
suite "Custom parser tests":
|
||||
test "Fall back to int parser":
|
||||
customVisit = TokenRegistry.default
|
||||
|
||||
let
|
||||
jData = test_dedent"""
|
||||
{
|
||||
"name": "FancyInt",
|
||||
"data": -12345
|
||||
}
|
||||
"""
|
||||
dData = Json.decode(jData, HasFancyInt)
|
||||
|
||||
check dData.name == "FancyInt"
|
||||
check dData.data.int == -12345
|
||||
check customVisit == (tkNumeric, tkCurlyRi, false)
|
||||
|
||||
test "Uint parser on negative integer":
|
||||
customVisit = TokenRegistry.default
|
||||
|
||||
let
|
||||
jData = test_dedent"""
|
||||
{
|
||||
"name": "FancyUInt",
|
||||
"data": -12345
|
||||
}
|
||||
"""
|
||||
dData = Json.decode(jData, HasFancyUInt)
|
||||
|
||||
check dData.name == "FancyUInt"
|
||||
check dData.data.uint == 12345u # abs value
|
||||
check customVisit == (tkNumeric, tkExNegInt, false)
|
||||
|
||||
test "Uint parser on string integer":
|
||||
customVisit = TokenRegistry.default
|
||||
|
||||
let
|
||||
jData = test_dedent"""
|
||||
{
|
||||
"name": "FancyUInt",
|
||||
"data": "12345"
|
||||
}
|
||||
"""
|
||||
dData = Json.decode(jData, HasFancyUInt)
|
||||
|
||||
check dData.name == "FancyUInt"
|
||||
check dData.data.uint == 12345u
|
||||
check customVisit == (tkQuoted, tkExBlob, false)
|
||||
|
||||
test "Parser on text blob with embedded quote (backlash escape support)":
|
||||
customVisit = TokenRegistry.default
|
||||
|
||||
let
|
||||
jData = test_dedent"""
|
||||
{
|
||||
"name": "FancyText",
|
||||
"data": "a\bc\"\\def"
|
||||
}
|
||||
"""
|
||||
dData = Json.decode(jData, HasFancyText)
|
||||
|
||||
check dData.name == "FancyText"
|
||||
check dData.data.string == "abc\"\\def"
|
||||
check customVisit == (tkQuoted, tkExBlob, false)
|
||||
|
|
Loading…
Reference in New Issue