From edda2577a36e9e0dfda56298d98c40560f521d77 Mon Sep 17 00:00:00 2001 From: Zahary Karadjov Date: Mon, 21 Jan 2019 19:40:14 +0200 Subject: [PATCH] Handle large unsigned values and add Portable JsonMode in the Lexer --- json_serialization/lexer.nim | 32 ++++++++++++++-------- json_serialization/reader.nim | 50 +++++++++++++++++++---------------- json_serialization/types.nim | 12 +++++++++ tests/test_serialization.nim | 10 +++++++ 4 files changed, 70 insertions(+), 34 deletions(-) create mode 100644 json_serialization/types.nim diff --git a/json_serialization/lexer.nim b/json_serialization/lexer.nim index 98cbd9e..7bea9f2 100644 --- a/json_serialization/lexer.nim +++ b/json_serialization/lexer.nim @@ -1,6 +1,7 @@ import strutils, unicode, - faststreams/input_stream, std_shims/objects + faststreams/input_stream, std_shims/objects, + types export input_stream @@ -36,9 +37,11 @@ type errUnexpectedEof = "unexpected end of file", errCommentExpected = "comment expected" errOrphanSurrogate = "unicode surrogates must be followed by another unicode character" + errNonPortableInt = "number is outside the range of portable values" JsonLexer* = object stream: AsciiStream + mode*: JsonMode line*: int lineStartPos: int @@ -47,7 +50,7 @@ type tok*: TokKind err*: JsonError - intVal*: int + intVal*: int64 floatVal*: float strVal*: string @@ -72,19 +75,20 @@ proc isDigit(c: char): bool {.inline.} = proc col*(lexer: JsonLexer): int = lexer.stream.pos - lexer.lineStartPos -proc init*(T: type JsonLexer, stream: AsciiStream): T = +proc init*(T: type JsonLexer, stream: AsciiStream, mode = defaultJsonMode): T = T(stream: stream, + mode: mode, line: 0, lineStartPos: 0, tokenStart: -1, tok: tkError, err: errNone, - intVal: 0, + intVal: int64 0, floatVal: 0'f, strVal: "") -proc init*(T: type JsonLexer, stream: ByteStream): auto = - init(JsonLexer, AsciiStream(stream)) +proc init*(T: type JsonLexer, stream: ByteStream, mode = defaultJsonMode): auto = + init(JsonLexer, AsciiStream(stream), mode) template error(error: JsonError) {.dirty.} = lexer.err = error @@ -98,6 +102,10 @@ template requireNextChar(): char = checkForUnexpectedEof() lexer.stream.read() +template checkForNonPortableInt(val: uint64) = + if lexer.mode == Portable and val > uint64(maxPortableInt): + error errNonPortableInt + proc scanHexRune(lexer: var JsonLexer): int = for i in 0..3: let hexValue = hexCharValue requireNextChar() @@ -241,13 +249,13 @@ proc scanSign(lexer: var JsonLexer): int = advance lexer.stream return 1 -proc scanInt(lexer: var JsonLexer): int = +proc scanInt(lexer: var JsonLexer): uint64 = var c = lexer.stream.peek() - result = ord(c) - ord('0') + result = uint64(ord(c) - ord('0')) c = eatDigitAndPeek() while c.isDigit: - result = result * 10 + (ord(c) - ord('0')) + result = result * 10 + uint64(ord(c) - ord('0')) c = eatDigitAndPeek() proc scanNumber(lexer: var JsonLexer) = @@ -262,7 +270,9 @@ proc scanNumber(lexer: var JsonLexer) = c = lexer.stream.peek() elif c.isDigit: lexer.tok = tkInt - lexer.intVal = lexer.scanInt() + let scannedValue = lexer.scanInt() + checkForNonPortableInt scannedValue + lexer.intVal = int(scannedValue) if lexer.stream.eof: return c = lexer.stream.peek() if c == '.': @@ -287,7 +297,7 @@ proc scanNumber(lexer: var JsonLexer) = error errNumberExpected let exponent = lexer.scanInt() - if exponent >= len(powersOfTen): + if exponent >= uint64(len(powersOfTen)): error errExponentTooLarge if sign > 0: diff --git a/json_serialization/reader.nim b/json_serialization/reader.nim index cc4553c..0326a97 100644 --- a/json_serialization/reader.nim +++ b/json_serialization/reader.nim @@ -1,7 +1,10 @@ import strutils, typetraits, macros, faststreams/input_stream, serialization/object_serialization, - lexer + types, lexer + +export + types type JsonReader* = object @@ -30,12 +33,12 @@ type encountedToken*: TokKind expectedToken*: ExpectedTokenCategory -proc init*(T: type JsonReader, stream: AsciiStream): T = - result.lexer = JsonLexer.init stream +proc init*(T: type JsonReader, stream: AsciiStream, mode = defaultJsonMode): T = + result.lexer = JsonLexer.init(stream, mode) result.lexer.next() -template init*(T: type JsonReader, stream: ByteStream): auto = - init JsonReader, AsciiStream(stream) +template init*(T: type JsonReader, stream: ByteStream, mode = defaultJsonMode): auto = + init JsonReader, AsciiStream(stream), mode proc setParsed[T: enum](e: var T, s: string) = e = parseEnum[T](s) @@ -44,32 +47,32 @@ proc assignLineNumber(ex: ref JsonReaderError, r: JsonReader) = ex.line = r.lexer.line ex.col = r.lexer.col -proc unexpectedToken(r: JsonReader, expected: ExpectedTokenCategory) = +proc raiseUnexpectedToken(r: JsonReader, expected: ExpectedTokenCategory) = var ex = new UnexpectedToken ex.assignLineNumber(r) ex.encountedToken = r.lexer.tok ex.expectedToken = expected raise ex -proc requireToken(r: JsonReader, tk: TokKind) = - if r.lexer.tok != tk: - r.unexpectedToken case tk - of tkString: etString - of tkInt: etInt - of tkComma: etComma - of tkBracketRi: etBracketRi - of tkBracketLe: etBracketLe - of tkCurlyRi: etCurrlyRi - of tkCurlyLe: etCurrlyLe - else: (assert false; etBool) - -proc unexpectedField(r: JsonReader, fieldName, deserializedType: cstring) = +proc raiseUnexpectedField(r: JsonReader, fieldName, deserializedType: cstring) = var ex = new UnexpectedField ex.assignLineNumber(r) ex.encounteredField = fieldName ex.deserializedType = deserializedType raise ex +proc requireToken(r: JsonReader, tk: TokKind) = + if r.lexer.tok != tk: + r.raiseUnexpectedToken case tk + of tkString: etString + of tkInt: etInt + of tkComma: etComma + of tkBracketRi: etBracketRi + of tkBracketLe: etBracketLe + of tkCurlyRi: etCurrlyRi + of tkCurlyLe: etCurrlyLe + else: (assert false; etBool) + proc skipToken(r: var JsonReader, tk: TokKind) = r.requireToken tk r.lexer.next() @@ -88,7 +91,7 @@ proc readImpl(r: var JsonReader, value: var auto) = case tok of tkTrue: value = true of tkFalse: value = false - else: r.unexpectedToken etBool + else: r.raiseUnexpectedToken etBool r.lexer.next() elif value is enum: @@ -100,7 +103,7 @@ proc readImpl(r: var JsonReader, value: var auto) = # TODO: validate that the value is in range value = type(value)(r.lexer.intVal) else: - r.unexpectedToken etEnum + r.raiseUnexpectedToken etEnum r.lexer.next() elif value is SomeInteger: @@ -113,7 +116,8 @@ proc readImpl(r: var JsonReader, value: var auto) = case tok of tkInt: value = float(r.lexer.intVal) of tkFloat: value = r.lexer.floatVal - else: r.unexpectedToken etNumber + else: + r.raiseUnexpectedToken etNumber r.lexer.next() elif value is seq: @@ -152,7 +156,7 @@ proc readImpl(r: var JsonReader, value: var auto) = reader(value, r) else: const typeName = typetraits.name(T) - r.unexpectedField(r.lexer.strVal, typeName) + r.raiseUnexpectedField(r.lexer.strVal, typeName) if r.lexer.tok == tkComma: r.lexer.next() else: diff --git a/json_serialization/types.nim b/json_serialization/types.nim new file mode 100644 index 0000000..5ed9ebc --- /dev/null +++ b/json_serialization/types.nim @@ -0,0 +1,12 @@ +type + JsonMode* = enum + Relaxed + Portable + + JsonError* = object of CatchableError + +const + defaultJsonMode* = JsonMode.Relaxed + minPortableInt* = -9007199254740991 # -2**53 + 1 + maxPortableInt* = 9007199254740991 # +2**53 - 1 + diff --git a/tests/test_serialization.nim b/tests/test_serialization.nim index dfc4986..a3ed8c2 100644 --- a/tests/test_serialization.nim +++ b/tests/test_serialization.nim @@ -40,3 +40,13 @@ suite "toJson tests": } """ + test "max unsigned value": + var uintVal = not uint64(0) + let jsonValue = Json.encode(uintVal) + check: + jsonValue == "18446744073709551615" + Json.decode(jsonValue, uint64) == uintVal + + expect JsonReaderError: + discard Json.decode(jsonValue, uint64, mode = Portable) +