nim-json-serialization/json_serialization/lexer.nim

876 lines
24 KiB
Nim
Raw Normal View History

2023-12-13 09:07:57 +00:00
# json-serialization
# Copyright (c) 2019-2023 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
2018-12-17 23:01:06 +00:00
import
std/[json, unicode],
2020-05-05 17:28:44 +00:00
faststreams/inputs,
types
2018-12-17 23:01:06 +00:00
export
2022-02-18 09:26:15 +00:00
inputs, types
2018-12-17 23:01:06 +00:00
type
JsonErrorKind* = enum
2023-12-18 04:05:12 +00:00
errNone = "no error"
errHexCharExpected = "hex char expected (part of escape sequence)"
errStringExpected = "string expected"
errColonExpected = "':' expected"
errCommaExpected = "',' expected"
errBracketRiExpected = "']' expected"
errCurlyRiExpected = "'}' expected"
errBracketLeExpected = "'[' expected"
errCurlyLeExpected = "'{' expected"
errQuoteExpected = "'\"' or \"'\" expected"
errNumberExpected = "number expected"
errExponentTooLarge = "exponent too large"
errUnexpectedEof = "unexpected end of file"
2018-12-17 23:01:06 +00:00
errCommentExpected = "comment expected"
2023-12-18 04:05:12 +00:00
errBoolExpected = "boolean value expected"
errNullExpected = "null value expected"
errCommentNotAllowed = "comment not allowed, please set 'allowComments' flag"
errTrailingComma = "trailing comma not allowed, please set 'trailingComma' flag"
2018-12-17 23:01:06 +00:00
errOrphanSurrogate = "unicode surrogates must be followed by another unicode character"
errNonPortableInt = "number is outside the range of portable values"
2023-12-18 04:05:12 +00:00
errCustomIntExpected = "not a customised integer"
errCustomBlobExpected = "not a customised quoted blob"
errLeadingZero = "leading zero is not allowed in integer"
errU64Overflow = "uint64 overflow detected"
errIntDigitLimit = "max number of integer digits reached"
errFracDigitLimit = "max number of fraction digits reached"
errExpDigitLimit = "max number of exponent digits reached"
errInvalidBool = "invalid boolean value"
errInvalidNull = "invalid null value"
errStringLengthLimit = "max number of string chars reached, please set `stringLengthLimit` to overrride"
errEscapeHex = "please set `escapeHex` flag to allow \\xHH escape mode"
errRelaxedEscape = "unsupported escape char, set `relaxedEscape` flag to override"
errLeadingFraction = "fraction number must be preceded by number, set `leadingFraction` to override"
errUnknownChar = "unknown character"
errNestedDepthLimit = "max depth of nested structure reached, please set `nestedDepthLimit` to override"
errArrayElementsLimit = "max number of array elements reached, please set `arrayElementsLimit` to override"
errObjectMembersLimit = "max number of object members reached, please set `objectMembersLimit` to override"
errMissingFirstElement = "first array/table element missing"
errEmptyFraction = "fraction number should have at least one fractional digit"
errIntPosSign = "integer with positive sign is not allowed, please set `integerPositiveSign` to override"
errValueExpected = "json value expected, got comma"
errEscapeControlChar = "control character x00-x1F must be escaped"
errInvalidInt = "invalid integer value"
2018-12-17 23:01:06 +00:00
JsonLexer* = object
stream*: InputStream
2023-12-18 04:05:12 +00:00
err*: JsonErrorKind
flags*: JsonReaderFlags
conf*: JsonReaderConf
2018-12-17 23:01:06 +00:00
line*: int
lineStartPos: int
tokenStart: int
depthLimit: int
2018-12-17 23:01:06 +00:00
{.push gcsafe, raises: [].}
2023-12-18 04:05:12 +00:00
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
template error(error: JsonErrorKind) {.dirty.} =
lex.err = error
return
template error(error: JsonErrorKind, retVal: int) {.dirty.} =
lex.err = error
return retVal
template error(lex: JsonLexer, error: JsonErrorKind, action: untyped) {.dirty.} =
lex.err = error
action
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
template ok(lex: JsonLexer): bool =
lex.err == errNone
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
template readable(lex: JsonLexer): bool =
inputs.readable(lex.stream)
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
template peek(lex: JsonLexer): char =
char inputs.peek(lex.stream)
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
2023-12-18 04:05:12 +00:00
template read(lex: JsonLexer): char =
char inputs.read(lex.stream)
template advance(lex: JsonLexer) =
inputs.advance(lex.stream)
template checkForUnexpectedEof(lex: JsonLexer) =
if not lex.readable:
error errUnexpectedEof
template requireNextChar(lex: JsonLexer): char =
lex.checkForUnexpectedEof()
lex.read()
template enterNestedStructure(lex: JsonLexer, action: untyped) {.dirty.} =
inc lex.depthLimit
if lex.conf.nestedDepthLimit > 0 and
lex.depthLimit > lex.conf.nestedDepthLimit:
lex.err = errNestedDepthLimit
action
template exitNestedStructure(lex: JsonLexer) =
dec lex.depthLimit
2023-12-18 04:05:12 +00:00
proc handleLF(lex: var JsonLexer) =
lex.advance
lex.line += 1
lex.lineStartPos = lex.stream.pos
lex.tokenStart = lex.stream.pos
proc isDigit(c: char): bool =
return (c >= '0' and c <= '9')
template eatDigitAndPeek(body: untyped): char =
lex.advance
if not lex.readable:
body
lex.peek()
proc skipWhitespace(lex: var JsonLexer)
2023-06-05 08:23:36 +00:00
{.gcsafe, raises: [IOError].} =
2023-12-18 04:05:12 +00:00
template handleCR =
# Beware: this is a template, because the return
# statement has to exit `skipWhitespace`.
lex.advance
if not lex.readable: return
if lex.peek() == '\n': lex.advance
lex.line += 1
lex.lineStartPos = lex.stream.pos
lex.tokenStart = lex.stream.pos
template handleComment =
# Beware: this is a template, because the return
# statement has to exit `skipWhitespace`.
lex.advance
lex.checkForUnexpectedEof()
case lex.peek()
of '/':
lex.advance
while true:
if not lex.readable: return
case lex.peek()
of '\r':
handleCR()
break
of '\n':
lex.handleLF()
break
else:
lex.advance
of '*':
lex.advance
while true:
if not lex.readable: return
case lex.peek()
of '\r':
handleCR()
of '\n':
lex.handleLF()
of '*':
lex.advance
lex.checkForUnexpectedEof()
if lex.peek() == '/':
lex.advance
break
else:
lex.advance
else:
error errCommentExpected
while lex.readable:
case lex.peek()
of '/':
lex.tokenStart = lex.stream.pos
if JsonReaderFlag.allowComments in lex.flags:
handleComment()
else:
error errCommentNotAllowed
of ' ', '\t':
lex.advance
of '\r':
handleCR()
of '\n':
lex.handleLF()
else:
break
proc next(lex: var JsonLexer): char {.gcsafe, raises: [IOError].} =
## Return the next available char from the stream associate with
## the lexer.
if not lex.readable(): return
result = lex.read()
2020-04-13 14:01:49 +00:00
func hexCharValue(c: char): int =
2018-12-17 23:01:06 +00:00
case c
of '0'..'9': ord(c) - ord('0')
of 'a'..'f': ord(c) - ord('a') + 10
of 'A'..'F': ord(c) - ord('A') + 10
else: -1
2023-12-18 04:05:12 +00:00
proc scanHexRune(lex: var JsonLexer): int
{.gcsafe, raises: [IOError].} =
for i in 0..3:
let hexValue = hexCharValue lex.requireNextChar()
if hexValue == -1: error errHexCharExpected
result = (result shl 4) or hexValue
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
proc scanHex(lex: var JsonLexer): int
{.gcsafe, raises: [IOError].} =
result = hexCharValue lex.requireNextChar()
if result == -1: error errHexCharExpected
let hex = hexCharValue lex.requireNextChar()
if hex == -1: error errHexCharExpected
result = (result shl 4) or hex
template requireMoreNumberChars() =
if not lex.readable:
error errNumberExpected
2023-12-18 04:05:12 +00:00
proc scanSign(lex: var JsonLexer): JsonSign
{.gcsafe, raises: [].} =
# Returns None, Pos, or Neg
# If a sign character is present, it must be followed
# by more characters representing the number. If this
# is not the case, lex.err = errNumberExpected.
let c = lex.peek()
if c == '-':
lex.advance
return JsonSign.Neg
elif c == '+':
lex.advance
return JsonSign.Pos
return JsonSign.None
proc scanSign[T](lex: var JsonLexer, val: var T, onlyNeg = false)
{.gcsafe, raises: [].} =
when T isnot (string or JsonVoid or JsonSign):
{.fatal: "`scanNumber` only accepts `string` or `JsonVoid` or `JsonSign`".}
let sign = lex.scanSign()
if onlyNeg and sign == JsonSign.Pos:
if integerPositiveSign notin lex.flags:
error errIntPosSign
if not lex.ok: return
when T is string:
if sign == JsonSign.Neg: val.add '-'
elif sign == JsonSign.Pos: val.add '+'
elif T is JsonSign:
val = sign
elif T is JsonVoid:
discard
proc scanInt[T](lex: var JsonLexer, val: var T,
limit: int,
intPart: bool = true,
errKind = errIntDigitLimit): int
{.gcsafe, raises: [IOError].} =
## scanInt only accepts `string` or `uint64` or `JsonVoid`
## If all goes ok, parsed-value is returned.
## On overflow, lex.err = errU64Overflow.
## If contains leading zero, lex.err = errLeadingZero.
## If exceeds digit numbers, lex.err = errKind.
var
first = lex.peek()
numDigits = 1
if first.isDigit.not:
error errNumberExpected, 0
# Always possible to append `9` is `val` is not larger
when T is uint64:
const canAppendDigit9 = (uint64.high - 9) div 10
val = uint64(ord(first) - ord('0'))
elif T is string:
val.add first
elif T is JsonVoid:
discard
else:
{.fatal: "`scanInt` only accepts `string` or `uint64` or `JsonVoid`".}
var c = eatDigitAndPeek: return 1
if first == '0' and c.isDigit and intPart:
error errLeadingZero, 1
inc numDigits
while c.isDigit:
if numDigits > limit:
error errKind, numDigits
# Process next digit unless overflow/maxdigit
if lex.ok:
when T is uint64:
let lsDgt = uint64(ord(c) - ord('0'))
if canAppendDigit9 < val and
(uint64.high - lsDgt) div 10 < val:
val = uint64.high
error errU64Overflow, numDigits
else:
val = val * 10 + lsDgt
elif T is string:
val.add c
# Fetch next digit
c = eatDigitAndPeek: return numDigits
inc numDigits
numDigits
# ------------------------------------------------------------------------------
# Constructors
# ------------------------------------------------------------------------------
proc init*(T: type JsonLexer,
stream: InputStream,
flags: JsonReaderFlags = defaultJsonReaderFlags,
conf: JsonReaderConf = defaultJsonReaderConf): T =
2018-12-17 23:01:06 +00:00
T(stream: stream,
2023-12-18 04:05:12 +00:00
flags: flags,
conf: conf,
line: 1,
2018-12-17 23:01:06 +00:00
lineStartPos: 0,
tokenStart: -1,
err: errNone,
2023-12-18 04:05:12 +00:00
)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
func isErr*(lex: JsonLexer): bool =
lex.err != errNone
proc col*(lex: JsonLexer): int =
lex.stream.pos - lex.lineStartPos
proc tokenStartCol*(lex: JsonLexer): int =
1 + lex.tokenStart - lex.lineStartPos
proc nonws*(lex: var JsonLexer): char {.gcsafe, raises: [IOError].} =
lex.skipWhitespace()
lex.tokenStart = lex.stream.pos
if lex.readable:
return lex.peek()
proc scanBool*(lex: var JsonLexer): bool {.gcsafe, raises: [IOError].} =
case lex.peek
of 't':
lex.advance
# Is this "true"?
if lex.next != 'r' or
lex.next != 'u' or
lex.next != 'e':
error errInvalidBool
result = true
of 'f':
lex.advance
# Is this "false"?
if lex.next != 'a' or
lex.next != 'l' or
lex.next != 's' or
lex.next != 'e':
error errInvalidBool
result = false
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
else:
error errInvalidBool
proc scanNull*(lex: var JsonLexer) {.gcsafe, raises: [IOError].} =
if lex.peek == 'n':
lex.advance
# Is this "null"?
if lex.next != 'u' or
lex.next != 'l' or
lex.next != 'l':
error errInvalidNull
else:
error errInvalidNull
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
proc scanNumber*[T](lex: var JsonLexer, val: var T)
{.gcsafe, raises: [IOError].} =
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
when T isnot (string or JsonVoid or JsonNumber):
{.fatal: "`scanNumber` only accepts `string` or `JsonVoid` or `JsonNumber`".}
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
when T is JsonNumber:
lex.scanSign(val.sign, true)
else:
lex.scanSign(val, true)
2023-12-18 04:05:12 +00:00
if not lex.ok: return
requireMoreNumberChars()
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
var
c = lex.peek()
fractionDigits = 0
hasFraction = false
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
if c == '.':
hasFraction = true
if leadingFraction notin lex.flags:
error errLeadingFraction
when T is string:
val.add '.'
lex.advance
requireMoreNumberChars()
c = lex.peek()
elif c.isDigit:
when T is string or T is JsonVoid:
discard lex.scanInt(val, lex.conf.integerDigitsLimit)
elif T is JsonNumber:
discard lex.scanInt(val.integer, lex.conf.integerDigitsLimit)
if not lex.ok: return
if not lex.readable: return
c = lex.peek()
if c == '.':
hasFraction = true
when T is string:
val.add '.'
c = eatDigitAndPeek:
error errEmptyFraction
else:
error errNumberExpected
if c.isDigit:
when T is string or T is JsonVoid:
fractionDigits = lex.scanInt(val, lex.conf.fractionDigitsLimit,
false, errFracDigitLimit)
elif T is JsonNumber:
fractionDigits = lex.scanInt(val.fraction, lex.conf.fractionDigitsLimit,
false, errFracDigitLimit)
if not lex.ok: return
if hasFraction and fractionDigits == 0:
error errEmptyFraction
if not lex.readable: return
c = lex.peek()
if c in {'E', 'e'}:
when T is string:
val.add c
lex.advance
requireMoreNumberChars()
when T is JsonNumber:
lex.scanSign(val.expSign)
else:
lex.scanSign(val)
if not lex.ok: return
requireMoreNumberChars()
if not isDigit lex.peek():
error errNumberExpected
when T is string or T is JsonVoid:
discard lex.scanInt(val, lex.conf.exponentDigitsLimit,
false, errExpDigitLimit)
elif T is JsonNumber:
discard lex.scanInt(val.exponent, lex.conf.exponentDigitsLimit,
false, errExpDigitLimit)
proc scanString*[T](lex: var JsonLexer, val: var T, limit: int)
{.gcsafe, raises: [IOError].} =
## scanInt only accepts `string` or `JsonVoid`
## If all goes ok, parsed-value is returned.
## If exceeds string length limit, lex.err = errStringLengthLimit.
var strLen = 0
template appendVal(c: untyped) =
when T is string:
if limit > 0 and strLen + 1 > limit:
error errStringLengthLimit
val.add c
inc strLen
elif T is JsonVoid:
if limit > 0 and strLen + 1 > limit:
error errStringLengthLimit
inc strLen
discard c
else:
{.fatal: "`scanString` only accepts `string` or `JsonVoid`".}
template appendRune(c: untyped) =
when T is string:
if limit > 0 and strLen + c.len > limit:
error errStringLengthLimit
val.add c
inc(strLen, c.len)
else:
if limit > 0 and strLen + c.len > limit:
error errStringLengthLimit
inc(strLen, c.len)
lex.advance
2018-12-17 23:01:06 +00:00
while true:
2023-12-18 04:05:12 +00:00
var c = lex.requireNextChar()
2018-12-17 23:01:06 +00:00
case c
of '"':
break
of '\\':
2023-12-18 04:05:12 +00:00
c = lex.requireNextChar()
2018-12-17 23:01:06 +00:00
case c
of '\\', '"', '\'', '/':
2023-12-18 04:05:12 +00:00
appendVal c
2018-12-17 23:01:06 +00:00
of 'b':
2023-12-18 04:05:12 +00:00
appendVal '\b'
2018-12-17 23:01:06 +00:00
of 'f':
2023-12-18 04:05:12 +00:00
appendVal '\f'
2018-12-17 23:01:06 +00:00
of 'n':
2023-12-18 04:05:12 +00:00
appendVal '\n'
2018-12-17 23:01:06 +00:00
of 'r':
2023-12-18 04:05:12 +00:00
appendVal '\r'
2018-12-17 23:01:06 +00:00
of 't':
2023-12-18 04:05:12 +00:00
appendVal '\t'
2018-12-17 23:01:06 +00:00
of 'v':
2023-12-18 04:05:12 +00:00
appendVal '\x0B'
2018-12-17 23:01:06 +00:00
of '0':
2023-12-18 04:05:12 +00:00
appendVal '\x00'
of 'x':
if escapeHex notin lex.flags:
error errEscapeHex
let hex = lex.scanHex
if not lex.ok: return
appendVal hex.char
2018-12-17 23:01:06 +00:00
of 'u':
2023-12-18 04:05:12 +00:00
var rune = lex.scanHexRune()
if not lex.ok: return
2018-12-17 23:01:06 +00:00
# Deal with surrogates
if (rune and 0xfc00) == 0xd800:
2023-12-18 04:05:12 +00:00
if lex.requireNextChar() != '\\': error errOrphanSurrogate
if lex.requireNextChar() != 'u': error errOrphanSurrogate
let nextRune = lex.scanHexRune()
if not lex.ok: return
2018-12-17 23:01:06 +00:00
if (nextRune and 0xfc00) == 0xdc00:
rune = 0x10000 + (((rune - 0xd800) shl 10) or (nextRune - 0xdc00))
2023-12-18 04:05:12 +00:00
appendRune toUTF8(Rune(rune))
2018-12-17 23:01:06 +00:00
else:
2023-12-18 04:05:12 +00:00
if relaxedEscape notin lex.flags:
error errRelaxedEscape
else:
appendVal c
of '\x00'..'\x09', '\x0B', '\x0C', '\x0E'..'\x1F':
error errEscapeControlChar
2018-12-17 23:01:06 +00:00
of '\r', '\n':
error errQuoteExpected
else:
2023-12-18 04:05:12 +00:00
appendVal c
2018-12-17 23:01:06 +00:00
proc scanValue*[T](lex: var JsonLexer, val: var T)
2023-12-18 04:05:12 +00:00
{.gcsafe, raises: [IOError].}
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
template parseObjectImpl*(lex: JsonLexer,
actionInitial: untyped,
actionClosing: untyped,
actionComma: untyped,
actionKey: untyped,
actionValue: untyped,
actionError: untyped) =
lex.enterNestedStructure(actionError)
2023-12-18 04:05:12 +00:00
actionInitial
lex.advance
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
var
numElem = 0
prevComma = false
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
while true:
var next = lex.nonws()
if not lex.ok: actionError
if not lex.readable:
error(lex, errCurlyRiExpected, actionError)
case next
of '}':
lex.advance
actionClosing
break
of ',':
if prevComma:
error(lex, errValueExpected, actionError)
if numElem == 0:
error(lex, errMissingFirstElement, actionError)
prevComma = true
lex.advance
next = lex.nonws()
if not lex.ok: actionError
if next == '}':
if trailingComma in lex.flags:
lex.advance
actionClosing
break
else:
error(lex, errTrailingComma, actionError)
else:
actionComma
of '"':
if numElem >= 1 and not prevComma:
error(lex, errCommaExpected, actionError)
prevComma = false
inc numElem
if lex.conf.objectMembersLimit > 0 and
numElem > lex.conf.objectMembersLimit:
error(lex, errObjectMembersLimit, actionError)
actionKey
if not lex.ok: actionError
next = lex.nonws()
if not lex.ok: actionError
if next != ':':
error(lex, errColonExpected, actionError)
lex.advance
actionValue
if not lex.ok: actionError
else:
error(lex, errStringExpected, actionError)
lex.exitNestedStructure()
proc scanObject*[T](lex: var JsonLexer, val: var T)
2023-12-18 04:05:12 +00:00
{.gcsafe, raises: [IOError].} =
when T isnot (string or JsonVoid or JsonObjectType):
{.fatal: "`scanObject` only accepts `string` or `JsonVoid` or `JsonObjectType`".}
parseObjectImpl(lex):
# initial action
when T is string:
val.add '{'
do:
# closing action
when T is string:
val.add '}'
do:
# comma action
when T is string:
val.add ','
do:
# key action
when T is JsonVoid:
lex.scanString(val, lex.conf.stringLengthLimit)
elif T is string:
val.add '"'
lex.scanString(val, lex.conf.stringLengthLimit)
if lex.ok: val.add '"'
else:
var key: string
lex.scanString(key, lex.conf.stringLengthLimit)
do:
# value action
when T is string:
val.add ':'
lex.scanValue(val)
2023-12-18 04:05:12 +00:00
elif T is JsonVoid:
lex.scanValue(val)
2023-12-18 04:05:12 +00:00
else:
var newVal: valueType(T)
lex.scanValue(newVal)
2023-12-18 04:05:12 +00:00
if newVal.isNil.not:
val[key] = newVal
do:
# error action
return
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
template parseArrayImpl*(lex: JsonLexer,
numElem: untyped,
actionInitial: untyped,
actionClosing: untyped,
actionComma: untyped,
actionValue: untyped,
actionError: untyped) =
2018-12-17 23:01:06 +00:00
lex.enterNestedStructure(actionError)
2023-12-18 04:05:12 +00:00
actionInitial
lex.advance
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
2023-12-18 04:05:12 +00:00
var
numElem {.inject.} = 0
prevComma = false
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
2023-12-18 04:05:12 +00:00
while true:
var next = lex.nonws()
if not lex.ok: actionError
if not lex.readable:
error(lex, errBracketRiExpected, actionError)
case next
of ']':
lex.advance
actionClosing
break
of ',':
if prevComma:
error(lex, errValueExpected, actionError)
if numElem == 0:
# This happens with "[, 1, 2]", for instance
error(lex, errMissingFirstElement, actionError)
prevComma = true
lex.advance
next = lex.nonws()
if not lex.ok: actionError
# Check that this is not a terminating comma (like in
# "[b,]")
if next == ']':
if trailingComma notin lex.flags:
error(lex, errTrailingComma, actionError)
lex.advance
actionClosing
break
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
else:
2023-12-18 04:05:12 +00:00
actionComma
else:
if numElem >= 1 and not prevComma:
error(lex, errCommaExpected, actionError)
if lex.conf.arrayElementsLimit > 0 and
numElem + 1 > lex.conf.arrayElementsLimit:
error(lex, errArrayElementsLimit, actionError)
prevComma = false
actionValue
if not lex.ok: actionError
inc numElem
lex.exitNestedStructure()
proc scanArray*[T](lex: var JsonLexer, val: var T)
2023-12-18 04:05:12 +00:00
{.gcsafe, raises: [IOError].} =
when T isnot (string or JsonVoid or seq[JsonValueRef]):
{.fatal: "`scanArray` only accepts `string` or `JsonVoid` or `seq[JsonValueRef]`".}
parseArrayImpl(lex, numElem) do:
# initial action
when T is string:
val.add '['
do:
# closing action
when T is string:
val.add ']'
do:
# comma action
when T is string:
val.add ','
do:
# value action
when T is (string or JsonVoid):
lex.scanValue(val)
2023-12-18 04:05:12 +00:00
else:
val.setLen(numElem + 1)
lex.scanValue(val[numElem])
2023-12-18 04:05:12 +00:00
do:
# error action
return
2018-12-17 23:01:06 +00:00
proc scanValue*[T](lex: var JsonLexer, val: var T)
2023-12-18 04:05:12 +00:00
{.gcsafe, raises: [IOError].} =
when T isnot (string or JsonVoid or JsonValueRef):
{.fatal: "`scanValue` only accepts `string` or `JsonVoid` or `JsonValueRef`".}
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
var c = lex.nonws()
if not lex.ok: return
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
case c
of '"':
when T is JsonValueRef:
val = T(kind: JsonValueKind.String)
lex.scanString(val.strVal, lex.conf.stringLengthLimit)
elif T is string:
val.add '"'
lex.scanString(val, lex.conf.stringLengthLimit)
val.add '"'
2018-12-17 23:01:06 +00:00
else:
2023-12-18 04:05:12 +00:00
lex.scanString(val, lex.conf.stringLengthLimit)
if not lex.ok: return
of '+', '-', '.', '0'..'9':
when T is JsonValueRef:
val = T(kind: JsonValueKind.Number)
lex.scanNumber(val.numVal)
else:
lex.scanNumber(val)
if not lex.ok: return
of '{':
when T is JsonValueRef:
val = T(kind: JsonValueKind.Object)
lex.scanObject(val.objVal)
2023-12-18 04:05:12 +00:00
else:
lex.scanObject(val)
2023-12-18 04:05:12 +00:00
if not lex.ok: return
of '[':
when T is JsonValueRef:
val = T(kind: JsonValueKind.Array)
lex.scanArray(val.arrayVal)
2023-12-18 04:05:12 +00:00
else:
lex.scanArray(val)
2023-12-18 04:05:12 +00:00
if not lex.ok: return
of 't', 'f':
when T is JsonVoid:
discard lex.scanBool()
else:
let boolVal = lex.scanBool()
if not lex.ok: return
when T is JsonValueRef:
val = T(kind: JsonValueKind.Bool, boolVal: boolVal)
elif T is string:
if boolVal: val.add "true"
else: val.add "false"
of 'n':
lex.scanNull()
if not lex.ok: return
when T is JsonValueRef:
val = T(kind: JsonValueKind.Null)
elif T is string:
val.add "null"
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
else:
2023-12-18 04:05:12 +00:00
error errUnknownChar
2018-12-17 23:01:06 +00:00
2023-12-18 04:05:12 +00:00
proc tokKind*(lex: var JsonLexer): JsonValueKind
{.gcsafe, raises: [IOError].} =
var c = lex.nonws()
if not lex.ok: return
Lazy JSON parser (#42) * Proper error handling when parsed number exceeds uint64 details: Returns an "errNonPortableInt" error * need legacy flag for unit tests * lazy numeric token parser why: Numeric data may have a custom format. In particular,numeric data may be Uint256 which is not a JSON standard and might lead to an overflow. details: Numeric values are assigned a preliminary token type tkNumeric without being fully parsed. This can be used to insert a custom parser. Otherwise the value is parsed implicitly when querying/fetching the token type. + tok: replaced by getter tok() resolving lazy stuff (if necessary) + tokKind: current type without auto-resolving This lazy scheme could be extended to other custom types as long as the first token letter determines the custom type. * activate lazy parsing in reader howto: + no code change if a custom reader refers to an existing reader type FancyInt = distinct int proc readValue(reader: var JsonReader, value: var FancyInt) = value = reader.readValue(int).FancyInt + bespoke reader for cusom parsing type FancyUint = distinct uint proc readValue(reader: var JsonReader, value: var FancyUint) = if reader.lexer.lazyTok == tkNumeric: var accu: FancyUint reader.lexer.customIntValueIt: accu = accu * 10 + it.u256 value = accu elif reader.lexer.tok == tkString: value = reader.lexer.strVal.parseUint.FancyUint ... reader.lexer.next + full code explanation at json_serialisation/reader.readValue() * Add lazy parsing for customised string objects why: This allows parsing large or specialised strings without storing it in the lexer state descriptor. details: Similar logic applies as for the cusomised number parser. For mostly all practical cases, a DSL template is available serving as wrapper around the character/byte item processor code. * fix typo in unit test
2022-05-05 16:33:40 +00:00
2018-12-17 23:01:06 +00:00
case c
of '"':
2023-12-18 04:05:12 +00:00
return JsonValueKind.String
of '+', '-', '.', '0'..'9':
return JsonValueKind.Number
2018-12-17 23:01:06 +00:00
of '{':
2023-12-18 04:05:12 +00:00
return JsonValueKind.Object
of '[':
return JsonValueKind.Array
of 't', 'f':
return JsonValueKind.Bool
of 'n':
return JsonValueKind.Null
2018-12-17 23:01:06 +00:00
else:
2023-12-18 04:05:12 +00:00
error errUnknownChar