nim-json-serialization/json_serialization/reader.nim

774 lines
23 KiB
Nim
Raw Normal View History

2020-04-22 22:39:56 +00:00
{.experimental: "notnil".}
2018-12-17 23:01:06 +00:00
import
std/[enumutils, tables, macros, strformat, typetraits],
stew/[enums, objects],
faststreams/inputs, serialization/[formats, object_serialization, errors],
2022-02-18 09:26:15 +00:00
"."/[format, types, lexer]
2020-07-24 19:49:30 +00:00
from json import JsonNode, JsonNodeKind
export
enumutils, inputs, format, types, errors
2018-12-17 23:01:06 +00:00
type
2021-03-18 11:01:06 +00:00
JsonReader*[Flavor = DefaultFlavor] = object
lexer*: JsonLexer
allowUnknownFields: bool
requireAllFields: bool
2018-12-17 23:01:06 +00:00
JsonReaderError* = object of JsonError
2018-12-17 23:01:06 +00:00
line*, col*: int
UnexpectedField* = object of JsonReaderError
encounteredField*: string
2018-12-17 23:01:06 +00:00
deserializedType*: cstring
ExpectedTokenCategory* = enum
etValue = "value"
2018-12-17 23:01:06 +00:00
etBool = "bool literal"
etInt = "integer"
etEnumAny = "enum value (int / string)"
etEnumString = "enum value (string)"
2018-12-17 23:01:06 +00:00
etNumber = "number"
etString = "string"
etComma = "comma"
etColon = "colon"
2018-12-17 23:01:06 +00:00
etBracketLe = "array start bracket"
etBracketRi = "array end bracker"
etCurrlyLe = "object start bracket"
etCurrlyRi = "object end bracket"
GenericJsonReaderError* = object of JsonReaderError
deserializedField*: string
innerException*: ref CatchableError
UnexpectedTokenError* = object of JsonReaderError
2018-12-17 23:01:06 +00:00
encountedToken*: TokKind
expectedToken*: ExpectedTokenCategory
UnexpectedValueError* = object of JsonReaderError
IncompleteObjectError* = object of JsonReaderError
objectType: cstring
IntOverflowError* = object of JsonReaderError
isNegative: bool
absIntVal: BiggestUint
Json.setReader JsonReader
2023-06-05 08:23:36 +00:00
{.push gcsafe, raises: [].}
func valueStr(err: ref IntOverflowError): string =
if err.isNegative:
result.add '-'
result.add($err.absIntVal)
2020-06-15 22:03:00 +00:00
template tryFmt(expr: untyped): string =
try: expr
except CatchableError as err: err.msg
2020-06-15 22:03:00 +00:00
method formatMsg*(err: ref JsonReaderError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
tryFmt: fmt"{filename}({err.line}, {err.col}) Error while reading json file: {err.msg}"
method formatMsg*(err: ref UnexpectedField, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
2020-06-15 22:03:00 +00:00
tryFmt: fmt"{filename}({err.line}, {err.col}) Unexpected field '{err.encounteredField}' while deserializing {err.deserializedType}"
method formatMsg*(err: ref UnexpectedTokenError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
2020-06-15 22:03:00 +00:00
tryFmt: fmt"{filename}({err.line}, {err.col}) Unexpected token '{err.encountedToken}' in place of '{err.expectedToken}'"
method formatMsg*(err: ref GenericJsonReaderError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
2020-06-15 22:03:00 +00:00
tryFmt: fmt"{filename}({err.line}, {err.col}) Exception encountered while deserializing '{err.deserializedField}': [{err.innerException.name}] {err.innerException.msg}"
2018-12-17 23:01:06 +00:00
method formatMsg*(err: ref IntOverflowError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
2020-06-15 22:03:00 +00:00
tryFmt: fmt"{filename}({err.line}, {err.col}) The value '{err.valueStr}' is outside of the allowed range"
method formatMsg*(err: ref UnexpectedValueError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
2020-06-15 22:03:00 +00:00
tryFmt: fmt"{filename}({err.line}, {err.col}) {err.msg}"
method formatMsg*(err: ref IncompleteObjectError, filename: string):
2023-06-05 08:23:36 +00:00
string {.gcsafe, raises: [].} =
tryFmt: fmt"{filename}({err.line}, {err.col}) Not all required fields were specified when reading '{err.objectType}'"
func assignLineNumber*(ex: ref JsonReaderError, lexer: JsonLexer) =
ex.line = lexer.line
ex.col = lexer.tokenStartCol
2018-12-17 23:01:06 +00:00
func raiseUnexpectedToken*(lexer: JsonLexer, expected: ExpectedTokenCategory)
2023-06-05 08:23:36 +00:00
{.noreturn, raises: [JsonReaderError].} =
var ex = new UnexpectedTokenError
ex.assignLineNumber(lexer)
ex.encountedToken = lexer.lazyTok
2018-12-17 23:01:06 +00:00
ex.expectedToken = expected
raise ex
template raiseUnexpectedToken*(reader: JsonReader, expected: ExpectedTokenCategory) =
raiseUnexpectedToken(reader.lexer, expected)
func raiseUnexpectedValue*(
lexer: JsonLexer, msg: string) {.noreturn, raises: [JsonReaderError].} =
var ex = new UnexpectedValueError
ex.assignLineNumber(lexer)
ex.msg = msg
raise ex
template raiseUnexpectedValue*(r: JsonReader, msg: string) =
raiseUnexpectedValue(r.lexer, msg)
func raiseIntOverflow*(
lexer: JsonLexer, absIntVal: BiggestUint, isNegative: bool)
{.noreturn, raises: [JsonReaderError].} =
var ex = new IntOverflowError
ex.assignLineNumber(lexer)
ex.absIntVal = absIntVal
ex.isNegative = isNegative
raise ex
template raiseIntOverflow*(r: JsonReader, absIntVal: BiggestUint, isNegative: bool) =
raiseIntOverflow(r.lexer, absIntVal, isNegative)
func raiseUnexpectedField*(
lexer: JsonLexer, fieldName: string, deserializedType: cstring)
{.noreturn, raises: [JsonReaderError].} =
2018-12-17 23:01:06 +00:00
var ex = new UnexpectedField
ex.assignLineNumber(lexer)
2018-12-17 23:01:06 +00:00
ex.encounteredField = fieldName
ex.deserializedType = deserializedType
raise ex
template raiseUnexpectedField*(r: JsonReader, fieldName: string, deserializedType: cstring) =
raiseUnexpectedField(r.lexer, fieldName, deserializedType)
func raiseIncompleteObject*(
lexer: JsonLexer, objectType: cstring)
{.noreturn, raises: [JsonReaderError].} =
var ex = new IncompleteObjectError
ex.assignLineNumber(lexer)
ex.objectType = objectType
raise ex
template raiseIncompleteObject*(r: JsonReader, objectType: cstring) =
raiseIncompleteObject(r.lexer, objectType)
func handleReadException*(lexer: JsonLexer,
Record: type,
fieldName: string,
2019-08-01 14:12:31 +00:00
field: auto,
2023-06-05 08:23:36 +00:00
err: ref CatchableError) {.raises: [JsonReaderError].} =
var ex = new GenericJsonReaderError
ex.assignLineNumber(lexer)
ex.deserializedField = fieldName
ex.innerException = err
raise ex
template handleReadException*(r: JsonReader,
Record: type,
fieldName: string,
field: auto,
err: ref CatchableError) =
handleReadException(r.lexer, Record, fieldName, field, err)
proc init*(T: type JsonReader,
stream: InputStream,
mode = defaultJsonMode,
allowUnknownFields = false,
2023-06-05 08:23:36 +00:00
requireAllFields = false): T {.raises: [IOError].} =
mixin flavorAllowsUnknownFields, flavorRequiresAllFields
type Flavor = T.Flavor
result.allowUnknownFields = allowUnknownFields or flavorAllowsUnknownFields(Flavor)
result.requireAllFields = requireAllFields or flavorRequiresAllFields(Flavor)
result.lexer = JsonLexer.init(stream, mode)
result.lexer.next()
proc requireToken*(lexer: var JsonLexer, tk: TokKind) {.raises: [IOError, JsonReaderError].} =
if lexer.tok != tk:
lexer.raiseUnexpectedToken case tk
of tkString: etString
of tkInt, tkNegativeInt: etInt
of tkComma: etComma
of tkBracketRi: etBracketRi
of tkBracketLe: etBracketLe
of tkCurlyRi: etCurrlyRi
of tkCurlyLe: etCurrlyLe
of tkColon: etColon
else: (doAssert false; etBool)
proc skipToken*(lexer: var JsonLexer, tk: TokKind) {.raises: [IOError, JsonReaderError].} =
lexer.requireToken tk
lexer.next()
2018-12-17 23:01:06 +00:00
2020-07-24 19:49:30 +00:00
proc parseJsonNode(r: var JsonReader): JsonNode
2023-06-05 08:23:36 +00:00
{.gcsafe, raises: [IOError, JsonReaderError].}
2020-07-24 19:49:30 +00:00
2023-04-17 01:06:09 +00:00
proc readJsonNodeField(r: var JsonReader, field: var JsonNode)
2023-06-05 08:23:36 +00:00
{.gcsafe, raises: [IOError, JsonReaderError].} =
2023-04-17 01:06:09 +00:00
if field.isNil.not:
2020-07-24 19:49:30 +00:00
r.raiseUnexpectedValue("Unexpected duplicated field name")
r.lexer.next()
r.lexer.skipToken tkColon
2020-07-24 19:49:30 +00:00
field = r.parseJsonNode()
proc parseJsonNode(r: var JsonReader): JsonNode =
const maxIntValue: BiggestUint = BiggestInt.high.BiggestUint + 1
2020-07-24 19:49:30 +00:00
case r.lexer.tok
of tkCurlyLe:
result = JsonNode(kind: JObject)
r.lexer.next()
if r.lexer.tok != tkCurlyRi:
while r.lexer.tok == tkString:
try:
r.readJsonNodeField(result.fields.mgetOrPut(r.lexer.strVal, nil))
except KeyError:
raiseAssert "mgetOrPut should never raise a KeyError"
2020-07-24 19:49:30 +00:00
if r.lexer.tok == tkComma:
r.lexer.next()
else:
break
r.lexer.skipToken tkCurlyRi
2020-07-24 19:49:30 +00:00
of tkBracketLe:
result = JsonNode(kind: JArray)
r.lexer.next()
if r.lexer.tok != tkBracketRi:
while true:
result.elems.add r.parseJsonNode()
if r.lexer.tok == tkBracketRi:
break
else:
r.lexer.skipToken tkComma
2020-07-24 19:49:30 +00:00
# Skip over the last tkBracketRi
r.lexer.next()
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
r.raiseUnexpectedToken etValue
of tkString:
result = JsonNode(kind: JString, str: r.lexer.strVal)
r.lexer.next()
of tkInt:
if r.lexer.absIntVal > maxIntValue:
r.raiseIntOverflow(r.lexer.absIntVal, false)
else:
result = JsonNode(kind: JInt, num: BiggestInt r.lexer.absIntVal)
r.lexer.next()
of tkNegativeInt:
if r.lexer.absIntVal > maxIntValue + 1:
r.raiseIntOverflow(r.lexer.absIntVal, true)
else:
# `0 - x` is a magical trick that turns the unsigned
# value into its negative signed counterpart:
result = JsonNode(kind: JInt, num: cast[BiggestInt](BiggestUint(0) - r.lexer.absIntVal))
2020-07-24 19:49:30 +00:00
r.lexer.next()
of tkFloat:
result = JsonNode(kind: JFloat, fnum: r.lexer.floatVal)
r.lexer.next()
of tkTrue:
result = JsonNode(kind: JBool, bval: true)
r.lexer.next()
of tkFalse:
result = JsonNode(kind: JBool, bval: false)
r.lexer.next()
of tkNull:
result = JsonNode(kind: JNull)
r.lexer.next()
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
of tkQuoted, tkExBlob, tkNumeric, tkExInt, tkExNegInt:
raiseAssert "generic type " & $r.lexer.lazyTok & " is not applicable"
proc skipSingleJsValue*(lexer: var JsonLexer) {.raises: [IOError, JsonReaderError].} =
case lexer.tok
of tkCurlyLe:
lexer.next()
if lexer.tok != tkCurlyRi:
while true:
lexer.skipToken tkString
lexer.skipToken tkColon
lexer.skipSingleJsValue()
if lexer.tok == tkCurlyRi:
break
lexer.skipToken tkComma
# Skip over the last tkCurlyRi
lexer.next()
of tkBracketLe:
lexer.next()
if lexer.tok != tkBracketRi:
while true:
lexer.skipSingleJsValue()
if lexer.tok == tkBracketRi:
break
else:
lexer.skipToken tkComma
# Skip over the last tkBracketRi
lexer.next()
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
lexer.raiseUnexpectedToken etValue
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
of tkString, tkQuoted, tkExBlob,
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
tkTrue, tkFalse, tkNull:
lexer.next()
2023-06-05 08:23:36 +00:00
proc captureSingleJsValue(r: var JsonReader, output: var string) {.raises: [IOError, SerializationError].} =
r.lexer.renderTok output
case r.lexer.tok
of tkCurlyLe:
r.lexer.next()
if r.lexer.tok != tkCurlyRi:
while true:
r.lexer.renderTok output
r.lexer.skipToken tkString
r.lexer.renderTok output
r.lexer.skipToken tkColon
r.captureSingleJsValue(output)
r.lexer.renderTok output
if r.lexer.tok == tkCurlyRi:
break
else:
r.lexer.skipToken tkComma
else:
output.add '}'
# Skip over the last tkCurlyRi
r.lexer.next()
of tkBracketLe:
r.lexer.next()
if r.lexer.tok != tkBracketRi:
while true:
r.captureSingleJsValue(output)
r.lexer.renderTok output
if r.lexer.tok == tkBracketRi:
break
else:
r.lexer.skipToken tkComma
else:
output.add ']'
# Skip over the last tkBracketRi
r.lexer.next()
of tkColon, tkComma, tkEof, tkError, tkBracketRi, tkCurlyRi:
r.raiseUnexpectedToken etValue
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
of tkString, tkQuoted, tkExBlob,
tkInt, tkNegativeInt, tkFloat, tkNumeric, tkExInt, tkExNegInt,
tkTrue, tkFalse, tkNull:
r.lexer.next()
func allocPtr[T](p: var ptr T) =
p = create(T)
func allocPtr[T](p: var ref T) =
p = new(T)
2023-06-05 08:23:36 +00:00
iterator readArray*(r: var JsonReader, ElemType: typedesc): ElemType {.raises: [IOError, SerializationError].} =
2019-07-16 10:20:05 +00:00
mixin readValue
r.lexer.skipToken tkBracketLe
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
if r.lexer.lazyTok != tkBracketRi:
2019-07-16 10:20:05 +00:00
while true:
var res: ElemType
readValue(r, res)
yield res
if r.lexer.tok != tkComma: break
r.lexer.next()
r.lexer.skipToken tkBracketRi
2019-07-16 10:20:05 +00:00
iterator readObjectFields*(r: var JsonReader,
2023-06-05 08:23:36 +00:00
KeyType: type): KeyType {.raises: [IOError, SerializationError].} =
mixin readValue
r.lexer.skipToken tkCurlyLe
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
if r.lexer.lazyTok != tkCurlyRi:
while true:
var key: KeyType
readValue(r, key)
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
if r.lexer.lazyTok != tkColon: break
r.lexer.next()
yield key
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
if r.lexer.lazyTok != tkComma: break
r.lexer.next()
r.lexer.skipToken tkCurlyRi
iterator readObject*(r: var JsonReader,
KeyType: type,
2023-06-05 08:23:36 +00:00
ValueType: type): (KeyType, ValueType) {.raises: [IOError, SerializationError].} =
mixin readValue
for fieldName in readObjectFields(r, KeyType):
var value: ValueType
readValue(r, value)
yield (fieldName, value)
func isNotNilCheck[T](x: ref T not nil) {.compileTime.} = discard
func isNotNilCheck[T](x: ptr T not nil) {.compileTime.} = discard
2020-04-22 22:39:56 +00:00
func isFieldExpected*(T: type): bool {.compileTime.} =
T isnot Option
func totalExpectedFields*(T: type): int {.compileTime.} =
mixin isFieldExpected,
enumAllSerializedFields
enumAllSerializedFields(T):
if isFieldExpected(FieldType):
inc result
func setBitInWord(x: var uint, bit: int) {.inline.} =
let mask = uint(1) shl bit
x = x or mask
const bitsPerWord = sizeof(uint) * 8
func expectedFieldsBitmask*(TT: type): auto {.compileTime.} =
type T = TT
mixin isFieldExpected,
enumAllSerializedFields
const requiredWords =
(totalSerializedFields(T) + bitsPerWord - 1) div bitsPerWord
var res: array[requiredWords, uint]
var i = 0
enumAllSerializedFields(T):
if isFieldExpected(FieldType):
res[i div bitsPerWord].setBitInWord(i mod bitsPerWord)
inc i
res
template setBitInArray[N](data: var array[N, uint], bitIdx: int) =
when data.len > 1:
setBitInWord(data[bitIdx div bitsPerWord], bitIdx mod bitsPerWord)
else:
setBitInWord(data[0], bitIdx)
func isBitwiseSubsetOf[N](lhs, rhs: array[N, uint]): bool =
for i in low(lhs) .. high(lhs):
if (lhs[i] and rhs[i]) != lhs[i]:
return false
true
2020-04-29 08:21:20 +00:00
# this construct catches `array[N, char]` which otherwise won't decompose into
# openArray[char] - we treat any array-like thing-of-characters as a string in
# the output
template isCharArray[N](v: array[N, char]): bool = true
template isCharArray(v: auto): bool = false
func parseStringEnum[T](
r: var JsonReader, value: var T,
2023-06-05 08:23:36 +00:00
stringNormalizer: static[proc(s: string): string]) {.raises: [JsonReaderError].} =
try:
value = genEnumCaseStmt(
T, r.lexer.strVal,
default = nil, ord(T.low), ord(T.high), stringNormalizer)
2023-06-05 08:23:36 +00:00
except ValueError:
const typeName = typetraits.name(T)
r.raiseUnexpectedValue("Invalid value for '" & typeName & "'")
func strictNormalize(s: string): string = # Match enum value exactly
s
proc parseEnum[T](
r: var JsonReader, value: var T, allowNumericRepr: static[bool] = false,
2023-06-05 08:23:36 +00:00
stringNormalizer: static[proc(s: string): string] = strictNormalize) {.raises: [IOError, JsonReaderError].} =
const style = T.enumStyle
let tok = r.lexer.tok
case tok
of tkString:
r.parseStringEnum(value, stringNormalizer)
of tkInt:
when allowNumericRepr:
case style
of EnumStyle.Numeric:
if not value.checkedEnumAssign(r.lexer.absIntVal):
const typeName = typetraits.name(T)
r.raiseUnexpectedValue("Out of range for '" & typeName & "'")
of EnumStyle.AssociatedStrings:
r.raiseUnexpectedToken etEnumString
else:
r.raiseUnexpectedToken etEnumString
else:
case style
of EnumStyle.Numeric:
when allowNumericRepr:
r.raiseUnexpectedToken etEnumAny
else:
r.raiseUnexpectedToken etEnumString
of EnumStyle.AssociatedStrings:
r.raiseUnexpectedToken etEnumString
proc readRecordValue*[T](r: var JsonReader, value: var T)
{.raises: [SerializationError, IOError].} =
type
ReaderType {.used.} = type r
T = type value
r.lexer.skipToken tkCurlyLe
when T.totalSerializedFields > 0:
let
fieldsTable = T.fieldReadersTable(ReaderType)
const
expectedFields = T.expectedFieldsBitmask
var
encounteredFields: typeof(expectedFields)
mostLikelyNextField = 0
while true:
# Have the assignment parsed of the AVP
if r.lexer.lazyTok == tkQuoted:
r.lexer.accept
if r.lexer.lazyTok != tkString:
break
when T is tuple:
let fieldIdx = mostLikelyNextField
mostLikelyNextField += 1
else:
let fieldIdx = findFieldIdx(fieldsTable[],
r.lexer.strVal,
mostLikelyNextField)
if fieldIdx != -1:
let reader = fieldsTable[][fieldIdx].reader
r.lexer.next()
r.lexer.skipToken tkColon
reader(value, r)
encounteredFields.setBitInArray(fieldIdx)
elif r.allowUnknownFields:
r.lexer.next()
r.lexer.skipToken tkColon
r.lexer.skipSingleJsValue()
else:
const typeName = typetraits.name(T)
r.raiseUnexpectedField(r.lexer.strVal, cstring typeName)
if r.lexer.lazyTok == tkComma:
r.lexer.next()
else:
break
if r.requireAllFields and
not expectedFields.isBitwiseSubsetOf(encounteredFields):
const typeName = typetraits.name(T)
r.raiseIncompleteObject(typeName)
r.lexer.accept
r.lexer.skipToken tkCurlyRi
proc readValue*[T](r: var JsonReader, value: var T)
2023-06-05 08:23:36 +00:00
{.gcsafe, raises: [SerializationError, IOError].} =
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
## Master filed/object parser. This function relies on customised sub-mixins for particular
## object types.
##
## Customised readValue() examples:
## ::
## type
## FancyInt = distinct int
## FancyUInt = distinct uint
##
## proc readValue(reader: var JsonReader, value: var FancyInt) =
## ## Refer to another readValue() instance
## value = reader.readValue(int).FancyInt
##
## proc readValue(reader: var JsonReader, value: var FancyUInt) =
## ## Provide a full custum version of a readValue() instance
## if reader.lexer.lazyTok == tkNumeric:
## # lazyTok: Check token before the value is available
## var accu: FancyUInt
## # custom parser (the directive `customIntValueIt()` is a
## # convenience wrapper around `customIntHandler()`.)
## reader.lexer.customIntValueIt:
## accu = accu * 10 + it.u256
## value = accu
## elif reader.lexer.lazyTok == tkQuoted:
## var accu = string
## # The following is really for demo only (inefficient,
## # lacks hex encoding)
## reader.lexer.customTextValueIt:
## accu &= it
## value = accu.parseUInt.FancyUInt
## ...
## # prepare next parser cycle
## reader.lexer.next
##
2018-12-17 23:01:06 +00:00
mixin readValue
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
when value is (object or tuple):
let tok {.used.} = r.lexer.lazyTok
else:
let tok {.used.} = r.lexer.tok # resove lazy token
2018-12-17 23:01:06 +00:00
2020-07-24 19:49:30 +00:00
when value is JsonString:
r.captureSingleJsValue(string value)
elif value is JsonNode:
value = r.parseJsonNode()
elif value is string:
r.lexer.requireToken tkString
2018-12-17 23:01:06 +00:00
value = r.lexer.strVal
r.lexer.next()
2020-04-29 08:21:20 +00:00
elif value is seq[char]:
r.lexer.requireToken tkString
2020-04-29 08:21:20 +00:00
value.setLen(r.lexer.strVal.len)
for i in 0..<r.lexer.strVal.len:
value[i] = r.lexer.strVal[i]
r.lexer.next()
2020-04-29 08:21:20 +00:00
elif isCharArray(value):
r.lexer.requireToken tkString
2020-04-29 08:21:20 +00:00
if r.lexer.strVal.len != value.len:
# Raise tkString because we expected a `"` earlier
r.raiseUnexpectedToken(etString)
for i in 0..<value.len:
value[i] = r.lexer.strVal[i]
r.lexer.next()
2018-12-17 23:01:06 +00:00
elif value is bool:
case tok
of tkTrue: value = true
of tkFalse: value = false
else: r.raiseUnexpectedToken etBool
2018-12-17 23:01:06 +00:00
r.lexer.next()
elif value is ref|ptr:
2020-04-22 22:39:56 +00:00
when compiles(isNotNilCheck(value)):
allocPtr value
value[] = readValue(r, type(value[]))
2020-04-22 22:39:56 +00:00
else:
if tok == tkNull:
value = nil
r.lexer.next()
else:
allocPtr value
value[] = readValue(r, type(value[]))
2018-12-17 23:01:06 +00:00
elif value is enum:
r.parseEnum(value)
2018-12-17 23:01:06 +00:00
r.lexer.next()
elif value is SomeSignedInt:
2018-12-17 23:01:06 +00:00
type TargetType = type(value)
let
isNegative = tok == tkNegativeInt
maxValidAbsValue: BiggestUint =
if isNegative:
TargetType.high.BiggestUint + 1
else:
TargetType.high.BiggestUint
if r.lexer.absIntVal > maxValidAbsValue:
r.raiseIntOverflow(r.lexer.absIntVal, isNegative)
case tok
of tkInt:
value = TargetType(r.lexer.absIntVal)
of tkNegativeInt:
if r.lexer.absIntVal == maxValidAbsValue:
# We must handle this as a special case because it would be illegal
# to convert a value like 128 to int8 before negating it. The max
# int8 value is 127 (while the minimum is -128).
value = low(TargetType)
else:
value = -TargetType(r.lexer.absIntVal)
else:
r.raiseUnexpectedToken etInt
r.lexer.next()
elif value is SomeUnsignedInt:
type TargetType = type(value)
if r.lexer.absIntVal > TargetType.high.BiggestUint:
r.raiseIntOverflow(r.lexer.absIntVal, isNegative = false)
case tok
of tkInt:
value = TargetType(r.lexer.absIntVal)
else:
r.raiseUnexpectedToken etInt
2018-12-17 23:01:06 +00:00
r.lexer.next()
elif value is SomeFloat:
case tok
of tkInt: value = float(r.lexer.absIntVal)
2018-12-17 23:01:06 +00:00
of tkFloat: value = r.lexer.floatVal
else:
r.raiseUnexpectedToken etNumber
2018-12-17 23:01:06 +00:00
r.lexer.next()
elif value is seq:
r.lexer.skipToken tkBracketLe
2018-12-17 23:01:06 +00:00
if r.lexer.tok != tkBracketRi:
while true:
let lastPos = value.len
value.setLen(lastPos + 1)
readValue(r, value[lastPos])
2018-12-17 23:01:06 +00:00
if r.lexer.tok != tkComma: break
r.lexer.next()
r.lexer.skipToken tkBracketRi
2018-12-17 23:01:06 +00:00
elif value is array:
r.lexer.skipToken tkBracketLe
2018-12-17 23:01:06 +00:00
for i in low(value) ..< high(value):
# TODO: dont's ask. this makes the code compile
if false: value[i] = value[i]
readValue(r, value[i])
r.lexer.skipToken tkComma
readValue(r, value[high(value)])
r.lexer.skipToken tkBracketRi
2018-12-17 23:01:06 +00:00
elif value is (object or tuple):
mixin flavorUsesAutomaticObjectSerialization
type Flavor = JsonReader.Flavor
const isAutomatic =
flavorUsesAutomaticObjectSerialization(Flavor)
when not isAutomatic:
const typeName = typetraits.name(T)
{.error: "Please override readValue for the " & typeName & " type (or import the module where the override is provided)".}
2018-12-17 23:01:06 +00:00
readRecordValue(r, value)
2018-12-17 23:01:06 +00:00
else:
const typeName = typetraits.name(T)
2018-12-17 23:01:06 +00:00
{.error: "Failed to convert to JSON an unsupported type: " & typeName.}
2023-07-06 07:13:29 +00:00
iterator readObjectFields*(r: var JsonReader): string {.
raises: [IOError, SerializationError].} =
for key in readObjectFields(r, string):
yield key
template configureJsonDeserialization*(
T: type[enum], allowNumericRepr: static[bool] = false,
stringNormalizer: static[proc(s: string): string] = strictNormalize) =
proc readValue*(r: var JsonReader, value: var T) {.
2023-06-05 08:23:36 +00:00
raises: [IOError, SerializationError].} =
static: doAssert not allowNumericRepr or enumStyle(T) == EnumStyle.Numeric
r.parseEnum(value, allowNumericRepr, stringNormalizer)
2023-06-05 08:23:36 +00:00
{.pop.}