# toml-serialization # Copyright (c) 2020 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license: [LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT # * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) # at your option. This file may not be copied, modified, or distributed except according to those terms. import std/[strutils, strformat, options, tables, math, unicode], faststreams/inputs, types, private/utils type TomlLexer* = object stream*: InputStream line*: int col*: int flags*: TomlFlags TomlReaderError* = object of TomlError line*, col*: int TomlFieldReadingError* = object of TomlReaderError field*: string error*: ref TomlError TomlErrorKind* = enum errNone = "no error" errUnterminatedString = "unterminated string" errInvalidChar = "invalid character in string, ord: " errUnderscoreDigit = "underscore must be surrounded by digit" errForbidLeadingZero = "leading zeroes are not allowed in integers" errIntegerOverflow = "integer overflow" errIllegalChar = "illegal character " errInvalidBool = "invalid boolean value" errNegateUint = "cannot negate unsigned integer" errInvalidUnicode = "invalid Unicode codepoint, " errEmptyDecimalPart = "decimal part empty" errMLStringName = "multi line string not allowed in name" errMLStringEnum = "multi line string not allowed as enum value" errNoDoubleBracket = "\']]\' expected" errNoSingleBracket = "\']\' expected" errExponentTooLarge = "exponent too large" errUnknownIdent = "unknown identifier" errInvalidDateTime = "invalid date time" errDateTimeML = "date time cannot span multiple lines" errKeyNameMissing = "key name missing" errMissingFirstElement= "first array/table element missing" errUnterminatedArray = "unterminatedArray" errUnterminatedTable = "unterminatedTable" errRequireKey = "require key" errDoubleBracket = "double bracket not allowed" errDuplicateTableKey = "duplicate table key not allowed: \'" errKeyNotFound = "key not found: " errInvalidHex = "invalid hex escape, " errExpectDoubleBracket= "expect table array for given key" const CR = '\r' LF* = '\l' EOF* = '\0' TAB = '\t' SPC = ' ' invalidCommentChar = {'\x00'..'\x08', '\x0A'..'\x1F', '\x7F'} template readChar(s: InputStream): char = char inputs.read(s) proc lineInfo(lex: TomlLexer): (int, int) {.inline.} = (lex.line, lex.col) template tryFmt(expr: untyped): string = try: expr except CatchableError as err: err.msg method formatMsg*(err: ref TomlReaderError, filename: string): string {.gcsafe, raises: [Defect].} = tryFmt: fmt"{filename}({err.line}, {err.col}) Error while reading TOML file: {err.msg}" method formatMsg*(err: ref TomlFieldReadingError, filename: string): string {.gcsafe, raises: [Defect].} = err.error.formatMsg(filename) proc newTomlError*(line, col: int, msg: string): ref TomlError = result = newException(TomlError, "(" & $line & ", " & $col & ")" & " " & msg) template raiseTomlErr*(li: (int, int), kind: TomlErrorKind) = raise newTomlError(li[0], li[1], $kind) template raiseTomlErr*(lex: TomlLexer, kind: TomlErrorKind) = raise newTomlError(lex.line, lex.col, $kind) template raiseTomlErr*(lex: TomlLexer, msg: string) = raise newTomlError(lex.line, lex.col, msg) template raiseInvalidChar*(lex: TomlLexer, c: char) = raiseTomlErr(lex, $errInvalidChar & $ord(c)) template raiseIllegalChar*(lex: TomlLexer, c: char) = raiseTomlErr(lex, $errIllegalChar & escape($c)) template raiseUnknownEscape*(lex: TomlLexer, c: char) = raiseTomlErr(lex, "unknown escape sequence \"\\" & c & "\"") template raiseUnexpectedValue*(lex: TomlLexer, s: string) = raiseTomlErr(lex, "Expected valid '" & s & "' value") template raiseUnexpectedField*(lex: TomlLexer, fieldName, typeName: string) = raiseTomlErr(lex, "Unexpected field '" & fieldName & "' while deserializing '" & typeName & "'") template raiseInvalidUnicode*(lex: TomlLexer, s: string) = raiseTomlErr(lex, $errInvalidUnicode & s) template raiseDigitCount*(lex: TomlLexer, expected, actual: int) = raiseTomlErr(lex, "Expected digits count '" & $expected & "' but got '" & $actual & "'") template raiseBadValue*(lex: TomlLexer, msg: string, value: SomeInteger) = raiseTomlErr(lex, msg & "(" & $value & ")") template raiseExpectChar*(lex: TomlLexer, c: char) = raiseTomlErr(lex, "expected \'" & escape($c) & "\'") template raiseNotTable(lex: TomlLexer, name: string) = raiseTomlErr(lex, "\"" & name & "\" is not a table") template raiseNotArray(lex: TomlLexer, name: string) = raiseTomlErr(lex, "\"" & name & "\" is not an array") template raiseKeyNotFound*(lex: TomlLexer, key: string) = raiseTomlErr(lex, $errKeyNotFound & "\'" & key & "\'") template raiseInvalidHex*(lex: TomlLexer, s: string) = raiseTomlErr(lex, $errInvalidHex & s) template raiseDuplicateTableKey*(lex: TomlLexer, s: string) = raiseTomlErr(lex, $errDuplicateTableKey & s & "\'") proc init*(T: type TomlLexer, stream: InputStream, flags: TomlFlags = {}): T = T(stream: stream, line: 1, col: 1, flags: flags ) proc next*(lex: var TomlLexer): char = ## Return the next available char from the stream associate with ## the parser lex, or EOF if there are no characters left. if not lex.stream.readable(): return EOF result = lex.stream.readChar() # Update the line and col number if result == LF: inc(lex.line) lex.col = 1 elif result != CR: inc(lex.col) template peek(): char = if not lex.stream.readable(): EOF else: lex.stream.peek().char template advance() = discard lex.next template advancePeek(): untyped = advance() peek() type LfSkipMode* = enum skipLf, skipNoLf proc nonws*(lex: var TomlLexer, skip: static[LfSkipMode]): char = ## Note: this procedure does *not* consider a newline as a ## "whitespace". Since newlines are often mandatory in TOML files ## (e.g. after a key/value specification), we do not want to miss ## them... const whitespaces = when skip == skipLf: {SPC, TAB, CR, LF} else: {SPC, TAB, CR} var next: char while true: next = peek if next == '#': # Skip the comment up to the newline, but do not jump over it while next != LF: if next == CR: next = advancePeek if next != LF: raiseIllegalChar(lex, next) else: break elif next in invalidCommentChar: raiseIllegalChar(lex, next) next = advancePeek if not lex.stream.readable: # rase case break if next notin whitespaces: break advance result = next type Leading {.pure.} = enum AllowZero, DenyZero func charTo(T: type, c: char): T {.inline.} = case c of {'0'..'9'}: result = T(c) - T('0') of {'a'..'f'}: result = T(c) - T('a') + T(10) of {'A'..'F'}: result = T(c) - T('A') + T(10) else: doAssert(false, "should never executed") func baseToDigits(base: NumberBase): set[char] {.inline.} = case base of base2: {'0', '1'} of base8: {'0'..'7'} of base10: strutils.Digits of base16: strutils.HexDigits proc scanUint[T](lex: var TomlLexer, value: var T, base: NumberBase, leading = Leading.AllowZero) = ## scanUint only accepts `string` or `uint64` or `TomlVoid` var next: char firstPos = true wasUnderscore = false when T is uint64: let baseNum = (case base of base2: 2'u64 of base8: 8'u64 of base10: 10'u64 of base16: 16'u64) let digits = baseToDigits(base) while true: wasUnderscore = next == '_' next = peek if next == '_': if firstPos or wasUnderscore: raiseTomlErr(lex, errUnderscoreDigit) advance continue if next notin digits: if wasUnderscore: raiseTomlErr(lex, errUnderscoreDigit) break advance if leading == Leading.DenyZero: if next == '0' and firstPos: # TOML specifications forbid this let secondChar = peek if secondChar in Digits: raiseTomlErr(lex, errForbidLeadingZero) when T is string: value.add next elif T is uint64: value = value * baseNum + charTo(T, next) elif T is TomlVoid: discard else: {.fatal: "`scanUint` only accepts `string` or `uint64` or `TomlVoid`".} firstPos = false proc scanDigits*[T](lex: var TomlLexer, value: var T, base: NumberBase, maxDigits = high(int)): int = ## scanUint only accepts `string` or `int` or `TomlVoid` var next: char when T is int: let baseNum = (case base of base2: 2 of base8: 8 of base10: 10 of base16: 16) let digits = baseToDigits(base) while true: next = peek if next notin digits: return inc result advance when T is string: value.add next elif T is int: value = value * baseNum + charTo(T, next) elif T is TomlVoid: discard else: {.fatal: "`scanDigits` only accepts `string` or `int` or `TomlVoid`".} if result == maxDigits: # consume the rest of digits while true: next = peek if next notin digits: break advance return proc scanEncoding[T](lex: var TomlLexer, value: var T): NumberBase = let next = lex.next case next: of 'b': scanUint(lex, value, base2) base2 of 'o': scanUint(lex, value, base8) base8 of 'x': scanUint(lex, value, base16) base16 else: raiseIllegalChar(lex, next) proc scanUnicode[T](lex: var TomlLexer, kind: char, res: var T) = when T isnot (string or TomlVoid): {.fatal: "`scanUnicode` only accepts `string` or `TomlVoid`".} var code: int let col = scanDigits(lex, code, base16) if kind == 'u' and col != 4: raiseInvalidUnicode(lex, "'u' must have 4 character value") if kind == 'U' and col != 8: raiseInvalidUnicode(lex, "'U' must have 8 character value") if code notin 0..0xD7FF and code notin 0xE000..0x10FFFF: raiseInvalidUnicode(lex, "must be a unicode scalar value") when T is string: res.add unicode.toUTF8(Rune(code)) proc scanHexEscape[T](lex: var TomlLexer, res: var T) = when T isnot (string or TomlVoid): {.fatal: "`scanHexEscape` only accepts `string` or `TomlVoid`".} if TomlHexEscape notin lex.flags: raiseInvalidHex(lex, "not supported by standard, please use `TomlHexEscape`") var code: int let col = scanDigits(lex, code, base16) if col != 2: raiseInvalidHex(lex, "'\\x' must have 2 character value") when T is string: res.add char(code) proc scanEscapeChar[T](lex: var TomlLexer, esc: char, res: var T) = when T isnot (string or TomlVoid): {.fatal: "`scanEscapeChar` only accepts `string` or `TomlVoid`".} when T is string: case esc of 'b': res.add "\b" of 't': res.add "\t" of 'n': res.add "\n" of 'f': res.add "\f" of 'r': res.add "\r" of '\'': res.add "\'" of '\"': res.add "\"" of '\\': res.add "\\" of 'x': scanHexEscape(lex, res) of 'u', 'U': scanUnicode(lex, esc, res) else: raiseUnknownEscape(lex, esc) else: case esc of 'b', 't', 'n', 'f', 'r', '\'', '\"', '\\': discard of 'x': scanHexEscape(lex, res) of 'u', 'U': scanUnicode(lex, esc, res) else: raiseUnknownEscape(lex, esc) func stringDelimiter(kind: StringType): char {.inline.} = result = (case kind of StringType.Basic: '\"' of StringType.Literal: '\'') proc scanMultiLineString[T](lex: var TomlLexer, res: var T, kind: static[StringType]) = when T isnot (string or TomlVoid): {.fatal: "`scanMultiLineString` only accepts `string` or `TomlVoid`".} ## This procedure parses strings enclosed within three consecutive ## sigle/double quotation marks. It assumes that all the quotation ## marks have already been consumed by the "lex" variable, which ## therefore is ready to read the first character of the string. const delimiter = stringDelimiter(kind) var isFirstChar = true next: char while true: next = peek # Skip the first newline, if it comes immediately after the # quotation marks if isFirstChar and (next == CR): isFirstChar = false next = advancePeek if next == LF: advance continue if isFirstChar and (next == LF): isFirstChar = false advance continue if next == delimiter: # Are we done? next = advancePeek if next == delimiter: next = advancePeek if next == delimiter: next = advancePeek # Done with this string if next == delimiter: advance when T is string: res.add delimiter return else: # Just got a double delimiter when T is string: res.add delimiter res.add delimiter continue else: # Just got a lone delimiter when T is string: res.add(delimiter) continue isFirstChar = false when kind == StringType.Basic: if next == '\\': # This can either be an escape sequence or a end-of-line char next = advancePeek if next in {LF, CR, SPC}: # We're at the end of a line: skip everything till the # next non-whitespace character while true: if next == LF: break elif next in {CR, SPC, TAB}: next = lex.next else: raiseIllegalChar(lex, next) while true: next = peek if next notin {CR, LF, SPC, TAB}: break advance continue else: # This is just an escape sequence (like "\t") lex.scanEscapeChar(lex.next, res) continue if next == EOF: raiseTomlErr(lex, errUnterminatedString) if ord(next) in {16, 31, 127}: raiseInvalidChar(lex, next) when T is string: res.add(next) advance proc scanSingleLineString[T](lex: var TomlLexer, res: var T, kind: static[StringType]) = when T isnot (string or TomlVoid): {.fatal: "`scanSingleLineString` only accepts `string` or `TomlVoid`".} ## This procedure parses strings enclosed within single/double ## quotation marks. It assumes that the quotation mark has already ## been consumed by the "lex" variable, which therefore is ready ## to read the first character of the string. const delimiter = stringDelimiter(kind) var next: char while true: next = lex.next if next == delimiter: break if next in {CR, LF, EOF}: raiseTomlErr(lex, errUnterminatedString) if ord(next) in {16, 31, 127}: raiseInvalidChar(lex, next) when kind == StringType.Basic: if next == '\\': next = lex.next lex.scanEscapeChar(next, res) continue when T is string: res.add(next) proc scanString*[T](lex: var TomlLexer, res: var T, kind: static[StringType]): bool = when T isnot (string or TomlVoid): {.fatal: "`scanString` only accepts `string` or `TomlVoid`".} ## This function assumes that "lex" has already consumed the ## first character (either \" or \' depends on `kind` param) ## returns true if multi line string const delimiter = stringDelimiter(kind) var next = peek if next == delimiter: next = advancePeek # We have two possibilities here: if next == delimiter: # (1) "long" multi-line strings. advance scanMultiLineString(lex, res, kind) return true # (2) the empty string else: scanSingleLineString(lex, res, kind) proc scanString*(lex: var TomlLexer, kind: static[StringType]): string = result = newStringOfCap(defaultStringCapacity) discard scanString(lex, result, kind) proc scanInt*[T](lex: var TomlLexer, value: var T): (Sign, NumberBase) = when T isnot (string or uint64): {.fatal: "`scanInt` only accepts `string` or `uint64`".} var next: char sign = Sign.None template zeroVal(): untyped = when T is string: value.add '0' (sign, base10) while true: next = peek case next: of '0': next = advancePeek if sign == Sign.None: if next in {'b', 'x', 'o'}: return (Sign.None, scanEncoding(lex, value)) else: case next: of strutils.Whitespace: return zeroVal() of strutils.Digits: raiseTomlErr(lex, errForbidLeadingZero) else: # else is a sole 0 return zeroVal() else: case next: of strutils.Whitespace: return zeroVal() else: # else is a sole 0 return zeroVal() of strutils.Digits - {'0'}: scanUint(lex, value, base10, Leading.DenyZero) break of '+': advance sign = Sign.Pos continue of '-': advance sign = Sign.Neg continue else: raiseIllegalChar(lex, next) break result = (sign, base10) proc scanName*[T](lex: var TomlLexer, res: var T) = when T isnot (string or TomlVoid): {.fatal: "`scanName` only accepts `string` or `TomlVoid`".} ## This parses the name of a key or a table var next = lex.nonws(skipNoLf) if next == '\"': advance if lex.scanString(res, StringType.Basic): raiseTomlErr(lex, errMLStringName) return elif next == '\'': advance if lex.scanString(res, StringType.Literal): raiseTomlErr(lex, errMLStringName) return while true: if (next in {'=', '.', '[', ']', EOF, SPC, TAB}): # Any of the above characters marks the end of the name break elif (next notin {'a'..'z', 'A'..'Z', '0'..'9', '_', '-'}): raiseIllegalChar(lex, next) else: when T is string: res.add(next) next = advancePeek proc scanKey*[T](lex: var TomlLexer, res: var T) = when T isnot (string or seq[string] or TomlVoid): {.fatal: "`scanKey` only accepts `string` or `seq[string]` or `TomlVoid`".} when T is seq[string]: var partName = newStringOfCap(defaultStringCapacity) var next: char while true: when T is (string or TomlVoid): lex.scanName(res) else: partName.setLen(0) lex.scanName(partName) res.add partName next = lex.nonws(skipNoLf) if next == '.': advance next = lex.nonws(skipNoLf) if next in {'\'', '\"', '-', '_', 'a'..'z', 'A'..'Z', '0'..'9'}: when T is string: res.add '.' else: raiseIllegalChar(lex, next) else: break type BracketType* {.pure.} = enum single, double proc scanTableName*[T](lex: var TomlLexer, res: var T): BracketType = when T isnot (string or seq[string]): {.fatal: "`scanTableName` only accepts `string` or `seq[string]`".} ## This code assumes that '[' has already been consumed var next = peek if next == '[': advance result = BracketType.double else: result = BracketType.single lex.scanKey(res) case lex.nonws(skipNoLf) of ']': if result == BracketType.double: next = advancePeek if next != ']': raiseTomlErr(lex, errNoDoubleBracket) # We must check that there is nothing else in this line advance next = lex.nonws(skipNoLf) if next notin {LF, EOF}: raiseIllegalChar(lex, next) advance else: raiseTomlErr(lex, errNoSingleBracket) proc scanBool*(lex: var TomlLexer): bool = var next = nonws(lex, skipLf) let li = lex.lineInfo # used for error messages case next of 't': advance # Is this "true"? if lex.next != 'r' or lex.next != 'u' or lex.next != 'e': raiseTomlErr(li, errInvalidBool) result = true of 'f': advance # Is this "false"? if lex.next != 'a' or lex.next != 'l' or lex.next != 's' or lex.next != 'e': raiseTomlErr(li, errInvalidBool) result = false else: raiseTomlErr(li, errInvalidBool) proc scanDecimalPart[T](lex: var TomlLexer, value: var T, sign: Sign) = ## `T` should be `Somefloat`, or `string`, or `TomlVoid` var next: char firstPos = true wasUnderscore = false when T is SomeFloat: var invPowerOfTen = 10.0 val = 0.0 while true: wasUnderscore = next == '_' next = peek if next == '_': if firstPos or wasUnderscore: raiseTomlErr(lex, errUnderscoreDigit) advance continue if next notin strutils.Digits: if wasUnderscore: raiseTomlErr(lex, errUnderscoreDigit) if firstPos: raiseTomlErr(lex, errEmptyDecimalPart) break when T is SomeFloat: val = val + T(int(next) - int('0')) / invPowerOfTen invPowerOfTen *= 10 elif T is string: value.add next elif T is TomlVoid: discard else: {.fatal: "`scanDecimalPart` only accepts `float` or `string` or `TomlVoid`".} firstPos = false advance when T is SomeFloat: if sign == Sign.Neg: value = value - val else: value = value + val proc scanExponent[T](lex: var TomlLexer, value: var T) = when T isnot (SomeFloat or string or TomlVoid): {.fatal: "`scanFrac` only accepts `float` or `string` or `TomlVoid`".} when T is SomeFloat: var exponent = 0'u64 sign = Sign.None var next = peek case next of '-': when T is string: value.add next elif T is SomeFloat: sign = Sign.Neg next = advancePeek if next notin strutils.Digits: raiseIllegalChar(lex, next) of '+': when T is string: value.add next elif T is SomeFloat: sign = Sign.Pos next = advancePeek if next notin strutils.Digits: raiseIllegalChar(lex, next) of strutils.Digits: discard else: raiseIllegalChar(lex, next) when T is SomeFloat: scanUint(lex, exponent, base10) else: scanUint(lex, value, base10) when T is SomeFloat: if exponent >= 308'u64: raiseTomlErr(lex, errExponentTooLarge) if sign == Sign.Neg: value = value / pow(10.0, exponent.float64) else: value = value * pow(10.0, exponent.float64) proc scanFrac[T](lex: var TomlLexer, value: var T, sign: Sign) = when T isnot (SomeFloat or string or TomlVoid): {.fatal: "`scanFrac` only accepts `float` or `string` or `TomlVoid`".} scanDecimalPart(lex, value, sign) var next = peek if next in {'e', 'E'}: advance when T is string: value.add next scanExponent(lex, value) proc addFrac*[T](lex: var TomlLexer, value: var T, sign: Sign) = when T is string: value.add '.' scanFrac(lex, value, sign) elif T is SomeFloat: scanFrac(lex, value, sign) elif T is TomlVoid: scanFrac(lex, value, sign) else: {.fatal: "`addFrac` only accepts `float` or `string` or `TomlVoid`".} proc scanFloat*[T](lex: var TomlLexer, value: var T): Sign = when T isnot (SomeFloat or string): {.fatal: "`scanFloat` only accepts float or string".} var sign = Sign.None next: char while true: next = peek case next of '-': advance when T is string: value.add next sign = Sign.Neg continue of '+': advance when T is string: value.add next sign = Sign.Pos continue of 'i': let li = lex.lineInfo advance if lex.next != 'n' or lex.next != 'f': raiseTomlErr(li, errUnknownIdent) when T is string: value.add "inf" else: value = Inf if sign == Sign.Neg: value = -Inf return sign of 'n': let li = lex.lineInfo advance if lex.next != 'a' or lex.next != 'n': raiseTomlErr(li, errUnknownIdent) when T is string: value.add "nan" else: value = NaN if sign == Sign.Neg: value = -NaN return sign of strutils.Digits: break else: raiseIllegalChar(lex, next) break when T is SomeFloat: var intPart = 0'u64 scanUint(lex, intPart, base10, Leading.DenyZero) value = T(intPart) if sign == Sign.Neg: value = -value else: scanUint(lex, value, base10, Leading.DenyZero) next = peek case next of '.': advance lex.addFrac(value, sign) of 'e', 'E': advance when T is string: value.add next lex.scanExponent(value) else: discard result = sign proc scanStrictNum[T](lex: var TomlLexer, res: var T, minVal, maxVal, count: int, msg: string) = when T isnot (int or string or TomlVoid): {.fatal: "`scanStrictNum` only accepts `int` or `string` or `TomlVoid`".} var val: int let parsed = scanDigits(lex, val, base10) if parsed != count: lex.raiseDigitCount(count, parsed) if val < minVal or val > maxVal: lex.raiseBadValue(msg, val) when T is string: res.add intToStr(val, count) elif T is int: res = val proc scanMinuteSecond*[T](lex: var TomlLexer, value: var T) = ## `scanTime` assume the two digits of hour already parsed when T isnot (TomlTime or string or TomlVoid): {.fatal: "`scanMinuteSecond` only accepts `TomlTime' or `string` or `TomlVoid`".} var next = lex.next line = lex.line if next != ':': lex.raiseExpectChar(':') when T is string: value.add next template num: untyped = value elif T is TomlVoid: template num: untyped = value else: var num: int scanStrictNum(lex, num, minVal = 0, maxVal = 59, count = 2, "number out of range for `minutes`") when T is TomlTime: value.minute = num next = peek if next != ':': if TomlHourMinute in lex.flags: return else: lex.raiseExpectChar(':') if lex.line != line: raiseTomlErr(lex, errDateTimeML) advance when T is string: value.add next # Parse the second. Note that seconds=60 *can* happen (leap second) scanStrictNum(lex, num, minVal = 0, maxVal = 60, count = 2, "number out of range for `seconds`") when T is TomlTime: value.second = num next = peek if next == '.': if lex.line != line: raiseTomlErr(lex, errDateTimeML) when T is string: value.add next next = advancePeek if next notin strutils.Digits: raiseIllegalChar(lex, next) # Toml spec says additional subsecond precision # should be truncated and not rounded when T is (string or TomlVoid): discard scanDigits(lex, value, base10, subsecondPrecision) elif T is TomlTime: discard scanDigits(lex, value.subsecond, base10, subsecondPrecision) proc scanTime*[T](lex: var TomlLexer, value: var T) = var line = lex.line when T is (string or TomlVoid): template num: untyped = value else: var num : int scanStrictNum(lex, num, minVal = 0, maxVal = 23, count = 2, "number out of range for `hours`") if lex.line != line: raiseTomlErr(lex, errDateTimeML) when T is TomlTime: value.hour = num scanMinuteSecond(lex, value) proc scanMonthDay*[T](lex: var TomlLexer, value: var T) = ## `scanMonthDay` assume the four digits of year already parsed when T isnot (TomlDate or string or TomlVoid): {.fatal: "`scanMonthDay` only accepts `TomlDate' or string or `TomlVoid`".} var next = lex.next line = lex.line if next != '-': lex.raiseExpectChar('-') when T is string: value.add next template num: untyped = value elif T is TomlVoid: template num: untyped = value else: var num : int scanStrictNum(lex, num, minVal = 1, maxVal = 12, count = 2, "number out of range for `month`") if lex.line != line: raiseTomlErr(lex, errDateTimeML) next = lex.next if next != '-': lex.raiseExpectChar('-') when T is string: value.add next elif T is TomlDate: value.month = num scanStrictNum(lex, num, minVal = 1, maxVal = 31, count = 2, "number out of range for `day`") when T is TomlDate: value.day = num proc scanDate*[T](lex: var TomlLexer, value: var T) = var line = lex.line when T is (string or TomlVoid): template num: untyped = value else: var num : int scanStrictNum(lex, num, minVal = 0, maxVal = 9999, count = 4, "number out of range for `year`") if lex.line != line: raiseTomlErr(lex, errDateTimeML) when T is (string or TomlVoid): scanMonthDay(lex, value) else: value.year = num scanMonthDay(lex, value) proc scanTimeZone*[T](lex: var TomlLexer, value: var T): bool = ## `scanTimeZone` assume the four digits of year already parsed when T isnot (TomlTimeZone or string or TomlVoid): {.fatal: "`scanTimeZone` only accepts `TomlTimeZone' or string or `TomlVoid`".} var line = lex.line next = peek when T is (string or TomlVoid): template num: untyped = value else: var num : int case next of 'z', 'Z': advance when T is string: value.add next elif T is TomlTimeZone: value.positiveShift = true value.hourShift = 0 value.minuteShift = 0 result = true of '-', '+': advance when T is string: value.add next elif T is TomlTimeZone: value.positiveShift = next == '+' scanStrictNum(lex, num, minVal = 0, maxVal = 23, count = 2, "number out of range for `zone hours`") if lex.line != line: raiseTomlErr(lex, errDateTimeML) next = lex.next if next != ':': lex.raiseExpectChar(':') when T is string: value.add next elif T is TomlTimeZone: value.hourShift = num scanStrictNum(lex, num, minVal = 0, maxVal = 59, count = 2, "number out of range for `zone minutes`") when T is TomlTimeZone: value.minuteShift = num result = true else: discard proc scanLongDate*[T](lex: var TomlLexer, year: int, value: var T) = var line = lex.line when T is (string or TomlVoid): scanMonthDay(lex, value) else: var date = TomlDate(year: year) scanMonthDay(lex, date) value.date = some(date) if lex.line != line: # only date part without time return let delim = peek if delim notin {'t', 'T', ' '}: return advance when T is string: value.add delim scanTime(lex, value) elif T is TomlVoid: scanTime(lex, value) else: var time: TomlTime scanTime(lex, time) value.time = some(time) when T is (string or TomlVoid): discard scanTimeZone(lex, value) else: var zone: TomlTimeZone if scanTimeZone(lex, zone): value.zone = some(zone) proc scanDateTime*[T](lex: var TomlLexer, value: var T, zeroLead = false) = when T isnot (TomlDateTime or string or TomlVoid): {.fatal: "`scanDateTime` only accepts `TomlDateTime' or string or `TomlVoid`".} var line = lex.line when T is (string or TomlVoid): let numDigit = scanDigits(lex, value, base10) else: var num: int let numDigit = scanDigits(lex, num, base10) let Z = zeroLead.int if numDigit == 4 - Z: if lex.line != line: raiseTomlErr(lex, errDateTimeML) when T is (string or TomlVoid): scanLongDate(lex, 0, value) else: scanLongDate(lex, num, value) elif numDigit == 2 - Z: if lex.line != line: raiseTomlErr(lex, errDateTimeML) when T is string: let num = (value[^2].int - '0'.int) * 10 + (value[^1].int - '0'.int) when T isnot TomlVoid: if num > 23: lex.raiseBadValue("number out of range for `hours`", num) when T is (string or TomlVoid): scanMinuteSecond(lex, value) else: var time = TomlTime(hour: num) scanMinuteSecond(lex, time) value.time = some(time) else: raiseTomlErr(lex, errInvalidDateTime) proc parseNumOrDate*[T](lex: var TomlLexer, value: var T) = when T isnot (TomlValueRef or string or TomlVoid): {.fatal: "`parseNumOrDate` only accepts `TomlValueRef' or string or `TomlVoid`".} var next: char sign = Sign.None when T is TomlValueRef: var uintVal: uint64 while true: next = peek case next: of '0': next = advancePeek if sign == Sign.None: if next in {'b', 'x', 'o'}: when T is (string or TomlVoid): discard scanEncoding(lex, value) else: value = TomlValueRef(kind: TomlKind.Int) discard scanEncoding(lex, uintVal) value.intVal = uintVal.int else: # This must now be a float or a date/time, or a sole 0 case next: of '.': advance when T is string: value.add '0' addFrac(lex, value, sign) elif T is TomlVoid: addFrac(lex, value, sign) else: value = TomlValueRef(kind: TomlKind.Float) addFrac(lex, value.floatVal, sign) return of strutils.Whitespace: when T is string: value.add '0' elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Int, intVal: 0) return of strutils.Digits: # This must now be a date/time when T is string: value.add '0' scanDateTime(lex, value, zeroLead = true) elif T is TomlVoid: scanDateTime(lex, value, zeroLead = true) else: value = TomlValueRef(kind: TomlKind.DateTime) scanDateTime(lex, value.dateTime, zeroLead = true) return of 'e', 'E': advance when T is string: value.add '0' value.add next scanExponent(lex, value) elif T is TomlVoid: scanExponent(lex, value) else: value = TomlValueRef(kind: TomlKind.Float, floatVal: 0'f64) scanExponent(lex, value.floatVal) return else: # else is a sole 0 when T is string: value.add '0' elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Int, intVal: 0) return else: # This must now be a float, or a sole 0 case next: of '.': advance when T is string: value.add '0' addFrac(lex, value, sign) elif T is TomlVoid: addFrac(lex, value, sign) else: value = TomlValueRef(kind: TomlKind.Float) if sign == Sign.Neg: value.floatVal = -value.floatVal addFrac(lex, value.floatVal, sign) return of strutils.Whitespace: when T is string: value.add '0' elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Int, intVal: 0) return of 'e', 'E': advance when T is string: value.add '0' value.add next scanExponent(lex, value) elif T is TomlVoid: scanExponent(lex, value) else: value = TomlValueRef(kind: TomlKind.Float, floatVal: 0'f64) if sign == Sign.Neg: value.floatVal = -value.floatVal scanExponent(lex, value.floatVal) return else: # else is a sole 0 when T is string: value.add '0' elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Int, intVal: 0) return of strutils.Digits - {'0'}: advance # This might be a date/time, or an int or a float var digits = 1 wasUnderscore = false when T is string: value.add next elif T is TomlValueRef: var curSum = int64(next) - int64('0') while true: next = peek if wasUnderscore and next notin strutils.Digits: raiseTomlErr(lex, errUnderscoreDigit) case next: of ':': if digits != 2: raiseTomlErr(lex, errInvalidDateTime) when T is (string or TomlVoid): scanMinuteSecond(lex, value) else: value = TomlValueRef(kind: TomlKind.DateTime) var time = TomlTime(hour: curSum.int) scanMinuteSecond(lex, time) value.dateTime.time = some(time) return of '-': if digits != 4: raiseTomlErr(lex, errInvalidDateTime) when T is (string or TomlVoid): scanLongDate(lex, 0, value) else: value = TomlValueRef(kind: TomlKind.DateTime) scanLongDate(lex, curSum.int, value.dateTime) return of '.': advance when T is (string or TomlVoid): addFrac(lex, value, sign) else: value = TomlValueRef(kind: TomlKind.Float, floatVal: float64(curSum)) if sign == Sign.Neg: value.floatVal = -value.floatVal addFrac(lex, value.floatVal, sign) return of 'e', 'E': advance when T is string: value.add next scanExponent(lex, value) elif T is TomlVoid: scanExponent(lex, value) else: value = TomlValueRef(kind: TomlKind.Float, floatVal: float64(curSum)) scanExponent(lex, value.floatVal) if sign == Sign.Neg: value.floatVal = -value.floatVal return of strutils.Digits: advance when T is string: value.add next inc digits elif T is TomlVoid: inc digits else: try: curSum = curSum * 10'i64 + int64(next) - int64('0') inc digits except OverflowError: raiseTomlErr(lex, errIntegerOverflow) wasUnderscore = false continue of '_': advance wasUnderscore = true continue of strutils.Whitespace: when T is TomlValueRef: value = TomlValueRef( kind: TomlKind.Int, intVal: if sign == Neg: -curSum else: curSum ) return else: when T is TomlValueRef: value = TomlValueRef( kind: TomlKind.Int, intVal: if sign == Neg: -curSum else: curSum ) return break of '+': advance sign = Sign.Pos when T is string: value.add '+' continue of '-': advance sign = Sign.Neg when T is string: value.add '-' continue of 'i': advance let li = lex.lineInfo if lex.next != 'n' or lex.next != 'f': raiseTomlErr(li, errUnknownIdent) when T is string: value.add "inf" elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Float, floatVal: Inf) return of 'n': advance let li = lex.lineInfo if lex.next != 'a' or lex.next != 'n': raiseTomlErr(li, errUnknownIdent) when T is string: value.add "nan" elif T is TomlValueRef: value = TomlValueRef(kind: TomlKind.Float, floatVal: NaN) return else: raiseIllegalChar(lex, next) break proc parseValue*[T](lex: var TomlLexer, value: var T) proc parseArray[T](lex: var TomlLexer, value: var T) = when T isnot (seq[TomlValueRef] or string or TomlVoid): {.fatal: "`parseArray` only accepts `TomlValueRef' or string or `TomlVoid`".} ## This procedure assumes that "lex" has already consumed the '[' ## character var numElem = 0 while true: var next = lex.nonws(skipLf) case next of ']': advance when T is string: value.add next return of EOF: raiseTomlErr(lex, errUnterminatedArray) of ',': advance if numElem == 0: # This happens with "[, 1, 2]", for instance raiseTomlErr(lex, errMissingFirstElement) # Check that this is not a terminating comma (like in # "[b,]") next = lex.nonws(skipLf) if next == ']': advance when T is string: value.add next return else: when T is string: value.add ',' else: when T is (string or TomlVoid): parseValue(lex, value) else: value.setLen(numElem + 1) parseValue(lex, value[numElem]) inc numElem proc parseInlineTable[T](lex: var TomlLexer, value: var T) = when T isnot (TomlTableRef or string or TomlVoid): {.fatal: "`parseInlineTable` only accepts `TomlTableRef' or string or `TomlVoid`".} ## This procedure assumes that "lex" has already consumed the '{' ## character var firstComma = true while true: var next = lex.nonws(skipNoLf) case next of '}': advance when T is string: value.add next return of EOF: raiseTomlErr(lex, errUnterminatedTable) of ',': advance if firstComma: raiseTomlErr(lex, errMissingFirstElement) when T is string: value.add ',' next = lex.nonws(skipNoLf) if next == '}': raiseIllegalChar(lex, '}') of '\n': if TomlInlineTableNewline in lex.flags: advance continue else: raiseIllegalChar(lex, next) else: firstComma = false when T is (string or TomlVoid): scanKey(lex, value) else: var keys: seq[string] scanKey(lex, keys) if keys.len == 0: raiseTomlErr(lex, errRequireKey) next = lex.nonws(skipNoLf) if next != '=': raiseExpectChar(lex, '=') advance when T is string: value.add next parseValue(lex, value) elif T is TomlVoid: parseValue(lex, value) else: var curTable = value for i in 1 ..< keys.len: let key = keys[i] let deepestTable = TomlTableRef.new curTable[key] = TomlValueRef( kind: TomlKind.InlineTable, tableVal: deepestTable ) curTable = deepestTable var val: TomlValueRef parseValue(lex, val) curTable[keys[^1]] = val proc parseValue[T](lex: var TomlLexer, value: var T) = when T isnot (TomlValueRef or string or TomlVoid): {.fatal: "`parseValue` only accepts `TomlValueRef' or string or `TomlVoid`".} var next = lex.nonws(skipNoLf) case next of strutils.Digits, '+', '-', 'i', 'n': parseNumOrDate(lex, value) of 't', 'f': when T is string: let val = lex.scanBool value.add if val: "true" else: "false" elif T is TomlVoid: discard lex.scanBool else: let val = lex.scanBool value = TomlValueRef(kind: TomlKind.Bool, boolVal: val) of '\"': advance when T is (string or TomlVoid): discard scanString(lex, value, StringType.Basic) else: value = TomlValueRef(kind: TomlKind.String) discard scanString(lex, value.stringVal, StringType.Basic) of '\'': advance when T is (string or TomlVoid): discard scanString(lex, value, StringType.Literal) else: value = TomlValueRef(kind: TomlKind.String) discard scanString(lex, value.stringVal, StringType.Literal) of '[': advance # An array when T is string: value.add next parseArray(lex, value) elif T is TomlVoid: parseArray(lex, value) else: value = TomlValueRef(kind: TomlKind.Array) parseArray(lex, value.arrayVal) of '{': advance # An inline table when T is string: value.add next parseInlineTable(lex, value) elif T is TomlVoid: parseInlineTable(lex, value) else: value = TomlValueRef(kind: TomlKind.InlineTable, tableVal: TomlTableRef.new) parseInlineTable(lex, value.tableVal) else: raiseIllegalChar(lex, next) proc newTableArray(size: int = 0): TomlValueRef = TomlValueRef( kind: TomlKind.Tables, tablesVal: newSeq[TomlTableRef](size) ) proc advanceToNextNestLevel(lex: var TomlLexer, curTable: var TomlTableRef, name: string) = var node = curTable[name] case node.kind of TomlKind.Table: curTable = node.tableVal of TomlKind.Tables: curTable = node.tablesVal[^1] else: raiseNotTable(lex, name) proc createOrAppendTableArray(lex: var TomlLexer, curTable: var TomlTableRef, names: seq[string]) = # This is a table array entry (e.g. "[[entry]]") for idx, name in names: let lastTable = idx == names.high curTable[].withValue(name, node) do: # The element exists: is it of the right type? if lastTable: if node[].kind != TomlKind.Tables: raiseNotArray(lex, name) var newTable = TomlTableRef.new node[].tablesVal.add(newTable) curTable = newTable else: advanceToNextNestLevel(lex, curTable, name) do: # If this is the last name in the chain (e.g., # "c" in "a.b.c"), its value should be an # array of tables, otherwise just a table if lastTable: var newValue = newTableArray(1) var newTable = TomlTableRef.new newValue.tablesVal[0] = newTable curTable[name] = newValue curTable = newTable else: var newValue = emptyTable() # Add the newly created object to the current table curTable[name] = newValue # Update the pointer to the current table curTable = newValue.tableVal proc createTable(lex: var TomlLexer, curTable: var TomlTableRef, names: openArray[string], dotted = false) = # This starts a new table (e.g. "[table]") for i, name in names: curTable[].withValue(name, val) do: if i == names.high and val[].kind == TomlKind.Table: if val[].tableVal.len == 0: raiseDuplicateTableKey(lex, name) elif not dotted: for value in val[].tableVal.values: if value.kind != TomlKind.Table: raiseDuplicateTableKey(lex, name) advanceToNextNestLevel(lex, curTable, name) do: # Add the newly created object to the current table var newVal = emptyTable() curTable[name] = newVal # Update the pointer to the current table curTable = newVal.tableVal proc checkEol*(lex: var TomlLexer, line: int) = # new key val should start at next line let next = lex.nonws(skipLf) if next != EOF: if lex.line == line: raiseIllegalChar(lex, next) proc parseKeyValue(lex: var TomlLexer, curTable: var TomlTableRef) = var pushTable = curTable var keys: seq[string] let line = lex.line scanKey(lex, keys) let key = keys.pop createTable(lex, curTable, keys, dotted = true) if curTable.hasKey(key): raiseDuplicateTableKey(lex, key) var next = lex.next if next != '=': raiseExpectChar(lex, '=') var newValue: TomlValueRef parseValue(lex, newValue) curTable[key] = newValue curTable = pushTable checkEol(lex, line) proc parseToml*(lex: var TomlLexer): TomlValueRef = result = emptyTable() var next: char curTable = result.tableVal while true: next = lex.nonws(skipLf) case next of '[': advance var names: seq[string] let bracket = scanTableName(lex, names) curTable = result.tableVal if bracket == BracketType.double: createOrAppendTableArray(lex, curTable, names) else: createTable(lex, curTable, names) of '=': raiseTomlErr(lex, errKeyNameMissing) of '#', '.', ']': raiseIllegalChar(lex, next) of EOF: break else: # Everything else marks the presence of a "key = value" pattern parseKeyValue(lex, curTable) proc parseKeyValue*(lex: var TomlLexer, names, key: openArray[string], tomlCase: TomlCase): bool = let line = lex.line var keys: seq[string] scanKey(lex, keys) let curKey = @names & keys var next = lex.next if next != '=': raiseExpectChar(lex, '=') if compare(curKey, key, tomlCase): return true var skipValue: TomlVoid parseValue(lex, skipValue) checkEol(lex, line) proc parseKey*(key: string, tomlCase: TomlCase): seq[string] = var stream = unsafeMemoryInput(key) var lex = init(TomlLexer, stream) lex.scanKey(result) proc parseToml*(lex: var TomlLexer, key: string, tomlCase: TomlCase): CodecState = ## move cursor to key position var next: char names: seq[string] keyList = parseKey(key, tomlCase) found = false while true: next = lex.nonws(skipLf) case next of '[': advance names.setLen(0) let bracket = scanTableName(lex, names) if bracket == BracketType.double: raiseTomlErr(lex, errDoubleBracket) if compare(keyList, names, tomlCase): found = true result = InsideRecord break of '=': raiseTomlErr(lex, errKeyNameMissing) of '#', '.', ']': raiseIllegalChar(lex, next) of EOF: break else: # Everything else marks the presence of a "key = value" pattern if parseKeyValue(lex, names, keyList, tomlCase): found = true result = ExpectValue break if not found: raiseKeyNotFound(lex, key)