Adapt type hinting to Yaml 1.2.2 (#132)

Updated type hinting for YAML 1.2.

 * previously outdated regexes from YAML 1.1 were used.
 * removed special cases for leading zeros that are not part of YAML 1.2.
 * updated literal regexes for infinity, NaN and boolean values.
 * added tests for type hinting

Co-authored-by: theamarin <theamarin@thecreation.de>
This commit is contained in:
theamarin 2023-04-10 17:17:03 +02:00 committed by GitHub
parent 4bd8216bb1
commit ee7dc11a90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 316 additions and 90 deletions

View File

@ -38,6 +38,11 @@ task quickstartTests, "Run quickstart tests":
--verbosity:0
setCommand "c", "test/tquickstart"
task hintsTests, "Run hints tests":
--r
--verbosity:0
setCommand "c", "test/thints"
task documentation, "Generate documentation":
exec "mkdir -p docout"
withDir "doc":

View File

@ -5,7 +5,7 @@
# distribution, for details about the copyright.
{.warning[UnusedImport]: off.}
import tlex, tjson, tserialization, tparser, tquickstart, tannotations
import tlex, tjson, tserialization, tparser, tquickstart, tannotations, thints
when not defined(gcArc) or defined(gcOrc):
import tdom
import tdom

262
test/thints.nim Normal file
View File

@ -0,0 +1,262 @@
import unittest
import ../yaml/hints
suite "Hints":
test "Int":
# [-+]? [0-9]+
assert guessType("0") == yTypeInteger
assert guessType("01") == yTypeInteger
assert guessType("10") == yTypeInteger
assert guessType("248") == yTypeInteger
assert guessType("-4248") == yTypeInteger
assert guessType("+42489") == yTypeInteger
test "Non-Int":
assert guessType("0+0") != yTypeInteger
assert guessType("0-1") != yTypeInteger
assert guessType("1x0") != yTypeInteger
assert guessType("248e") != yTypeInteger
assert guessType("-4248 4") != yTypeInteger
assert guessType("+-4248") != yTypeInteger
test "Float":
# [-+]? ( \. [0-9]+ | [0-9]+ ( \. [0-9]* )? ) ( [eE] [-+]? [0-9]+ )?
# Batch: [-+]? ( \. [0-9]+ | [0-9]+ ( \. [0-9]* )? )
assert guessType(".5") == yTypeFloat
assert guessType("+.5") == yTypeFloat
assert guessType("-.5") == yTypeFloat
assert guessType("0.5") == yTypeFloat
assert guessType("+0.5") == yTypeFloat
assert guessType("-0.5") == yTypeFloat
assert guessType("5.5") == yTypeFloat
assert guessType("+5.5") == yTypeFloat
assert guessType("-5.5") == yTypeFloat
assert guessType("5.") == yTypeFloat
assert guessType("+5.") == yTypeFloat
assert guessType("-5.") == yTypeFloat
# Batch: [-+]? \. [0-9]+ [eE] [-+]? [0-9]+
assert guessType(".5e5") == yTypeFloat
assert guessType("+.5e5") == yTypeFloat
assert guessType("-.5e5") == yTypeFloat
assert guessType(".5e+5") == yTypeFloat
assert guessType("+.5e+5") == yTypeFloat
assert guessType("-.5e+5") == yTypeFloat
assert guessType(".5e-5") == yTypeFloat
assert guessType("+.5e-5") == yTypeFloat
assert guessType("-.5e-5") == yTypeFloat
assert guessType(".5e05") == yTypeFloat
assert guessType("+.5e05") == yTypeFloat
assert guessType("-.5e05") == yTypeFloat
assert guessType(".5e+05") == yTypeFloat
assert guessType("+.5e+05") == yTypeFloat
assert guessType("-.5e+05") == yTypeFloat
assert guessType(".5e-05") == yTypeFloat
assert guessType("+.5e-05") == yTypeFloat
assert guessType("-.5e-05") == yTypeFloat
assert guessType(".05e5") == yTypeFloat
assert guessType("+.05e5") == yTypeFloat
assert guessType("-.05e5") == yTypeFloat
assert guessType(".05e+5") == yTypeFloat
assert guessType("+.05e+5") == yTypeFloat
assert guessType("-.05e+5") == yTypeFloat
assert guessType(".05e-5") == yTypeFloat
assert guessType("+.05e-5") == yTypeFloat
assert guessType("-.05e-5") == yTypeFloat
assert guessType(".05e05") == yTypeFloat
assert guessType("+.05e05") == yTypeFloat
assert guessType("-.05e05") == yTypeFloat
assert guessType(".05e+05") == yTypeFloat
assert guessType("+.05e+05") == yTypeFloat
assert guessType("-.05e+05") == yTypeFloat
assert guessType(".05e-05") == yTypeFloat
assert guessType("+.05e-05") == yTypeFloat
assert guessType("-.05e-05") == yTypeFloat
# Batch: [-+]? [0-9]+ \. [0-9]* [eE] [-+]? [0-9]+
assert guessType("0.5e5") == yTypeFloat
assert guessType("+0.5e5") == yTypeFloat
assert guessType("-0.5e5") == yTypeFloat
assert guessType("0.5e+5") == yTypeFloat
assert guessType("+0.5e+5") == yTypeFloat
assert guessType("-0.5e+5") == yTypeFloat
assert guessType("0.5e-5") == yTypeFloat
assert guessType("+0.5e-5") == yTypeFloat
assert guessType("-0.5e-5") == yTypeFloat
assert guessType("0.5e05") == yTypeFloat
assert guessType("+0.5e05") == yTypeFloat
assert guessType("-0.5e05") == yTypeFloat
assert guessType("0.5e+05") == yTypeFloat
assert guessType("+0.5e+05") == yTypeFloat
assert guessType("-0.5e+05") == yTypeFloat
assert guessType("0.5e-05") == yTypeFloat
assert guessType("+0.5e-05") == yTypeFloat
assert guessType("-0.5e-05") == yTypeFloat
assert guessType("0.05e5") == yTypeFloat
assert guessType("+0.05e5") == yTypeFloat
assert guessType("-0.05e5") == yTypeFloat
assert guessType("0.05e+5") == yTypeFloat
assert guessType("+0.05e+5") == yTypeFloat
assert guessType("-0.05e+5") == yTypeFloat
assert guessType("0.05e-5") == yTypeFloat
assert guessType("+0.05e-5") == yTypeFloat
assert guessType("-0.05e-5") == yTypeFloat
assert guessType("0.05e05") == yTypeFloat
assert guessType("+0.05e05") == yTypeFloat
assert guessType("-0.05e05") == yTypeFloat
assert guessType("0.05e+05") == yTypeFloat
assert guessType("+0.05e+05") == yTypeFloat
assert guessType("-0.05e+05") == yTypeFloat
assert guessType("0.05e-05") == yTypeFloat
assert guessType("+0.05e-05") == yTypeFloat
assert guessType("-0.05e-05") == yTypeFloat
# Batch: [-+]? [0-9]+ [eE] [-+]? [0-9]+
assert guessType("5e5") == yTypeFloat
assert guessType("+5e5") == yTypeFloat
assert guessType("-5e5") == yTypeFloat
assert guessType("5e+5") == yTypeFloat
assert guessType("+5e+5") == yTypeFloat
assert guessType("-5e+5") == yTypeFloat
assert guessType("5e-5") == yTypeFloat
assert guessType("+5e-5") == yTypeFloat
assert guessType("-5e-5") == yTypeFloat
assert guessType("5e05") == yTypeFloat
assert guessType("+5e05") == yTypeFloat
assert guessType("-5e05") == yTypeFloat
assert guessType("5e+05") == yTypeFloat
assert guessType("+5e+05") == yTypeFloat
assert guessType("-5e+05") == yTypeFloat
assert guessType("5e-05") == yTypeFloat
assert guessType("+5e-05") == yTypeFloat
assert guessType("-5e-05") == yTypeFloat
assert guessType("05e5") == yTypeFloat
assert guessType("+05e5") == yTypeFloat
assert guessType("-05e5") == yTypeFloat
assert guessType("05e+5") == yTypeFloat
assert guessType("+05e+5") == yTypeFloat
assert guessType("-05e+5") == yTypeFloat
assert guessType("05e-5") == yTypeFloat
assert guessType("+05e-5") == yTypeFloat
assert guessType("-05e-5") == yTypeFloat
assert guessType("05e05") == yTypeFloat
assert guessType("+05e05") == yTypeFloat
assert guessType("-05e05") == yTypeFloat
assert guessType("05e+05") == yTypeFloat
assert guessType("+05e+05") == yTypeFloat
assert guessType("-05e+05") == yTypeFloat
assert guessType("05e-05") == yTypeFloat
assert guessType("+05e-05") == yTypeFloat
assert guessType("-05e-05") == yTypeFloat
test "Non-Float":
assert guessType(".") != yTypeFloat
assert guessType("+.") != yTypeFloat
assert guessType("-.") != yTypeFloat
assert guessType(".e4") != yTypeFloat
assert guessType("+.e4") != yTypeFloat
assert guessType("-.e4") != yTypeFloat
test "Bool-True":
# ``true | True | TRUE``
assert guessType("true") == yTypeBoolTrue
assert guessType("True") == yTypeBoolTrue
assert guessType("TRUE") == yTypeBoolTrue
test "Bool-False":
# ``false | False | FALSE``
assert guessType("false") == yTypeBoolFalse
assert guessType("False") == yTypeBoolFalse
assert guessType("FALSE") == yTypeBoolFalse
test "Non-Bool":
# y, yes, on should not be treated as bool
assert guessType("y") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("Y") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("yes") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("Yes") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("on") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("On") notin {yTypeBoolTrue, yTypeBoolFalse}
# n, no, off should not be treated as bool
assert guessType("n") notin {yTypeBoolFalse, yTypeBoolFalse}
assert guessType("N") notin {yTypeBoolFalse, yTypeBoolFalse}
assert guessType("no") notin {yTypeBoolFalse, yTypeBoolFalse}
assert guessType("No") notin {yTypeBoolFalse, yTypeBoolFalse}
assert guessType("off") notin {yTypeBoolFalse, yTypeBoolFalse}
assert guessType("Off") notin {yTypeBoolFalse, yTypeBoolFalse}
# miss-cased words should not be treated as bool
assert guessType("tRUE") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("TRue") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("fAlse") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("FALSe") notin {yTypeBoolTrue, yTypeBoolFalse}
# miss-spelled words should not be treated as bool
assert guessType("ye") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("yse") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("nO") notin {yTypeBoolTrue, yTypeBoolFalse}
assert guessType("flase") notin {yTypeBoolTrue, yTypeBoolFalse}
test "Inf":
# ``[-+]? ( \.inf | \.Inf | \.INF )``
assert guessType(".inf") == yTypeFloatInf
assert guessType(".Inf") == yTypeFloatInf
assert guessType(".INF") == yTypeFloatInf
assert guessType("+.inf") == yTypeFloatInf
assert guessType("+.Inf") == yTypeFloatInf
assert guessType("+.INF") == yTypeFloatInf
assert guessType("-.inf") == yTypeFloatInf
assert guessType("-.Inf") == yTypeFloatInf
assert guessType("-.INF") == yTypeFloatInf
test "Non-Inf":
assert guessType(".InF") != yTypeFloatInf
assert guessType(".INf") != yTypeFloatInf
assert guessType("inf") != yTypeFloatInf
assert guessType("Inf") != yTypeFloatInf
assert guessType("INF") != yTypeFloatInf
test "NaN":
# ``\.nan | \.NaN | \.NAN``
assert guessType(".nan") == yTypeFloatNaN
assert guessType(".NaN") == yTypeFloatNaN
assert guessType(".NAN") == yTypeFloatNaN
test "Non-NaN":
assert guessType(".nAn") != yTypeFloatNaN
assert guessType(".Nan") != yTypeFloatNaN
assert guessType(".nAN") != yTypeFloatNaN
assert guessType("nan") != yTypeFloatNaN
assert guessType("NaN") != yTypeFloatNaN
assert guessType("NAN") != yTypeFloatNaN
test "Null":
# ``null | Null | NULL | ~``
assert guessType("null") == yTypeNull
assert guessType("Null") == yTypeNull
assert guessType("NULL") == yTypeNull
assert guessType("~") == yTypeNull
test "Non-Null":
assert guessType("NuLL") != yTypeNull
assert guessType("NUll") != yTypeNull
assert guessType("NULl") != yTypeNull
assert guessType("nULL") != yTypeNull
assert guessType("~~") != yTypeNull
assert guessType(".null") != yTypeNull
assert guessType(".Null") != yTypeNull
assert guessType(".NULL") != yTypeNull
assert guessType(".~") != yTypeNull

View File

@ -23,16 +23,18 @@ type
## You can use it to determine the type of YAML scalars that have a '?'
## non-specific tag, but using this feature is completely optional.
##
## See also: https://yaml.org/spec/1.2.2/#103-core-schema
##
## ================== =========================
## Name RegEx
## ================== =========================
## ``yTypeInteger`` ``0 | -? [1-9] [0-9]*``
## ``yTypeFloat`` ``-? [1-9] ( \. [0-9]* [1-9] )? ( e [-+] [1-9] [0-9]* )?``
## ``yTypeFloatInf`` ``-? \. (inf | Inf | INF)``
## ``yTypeFloatNaN`` ``-? \. (nan | NaN | NAN)``
## ``yTypeBoolTrue`` ``y|Y|yes|Yes|YES|true|True|TRUE|on|On|ON``
## ``yTypeBoolFalse`` ``n|N|no|No|NO|false|False|FALSE|off|Off|OFF``
## ``yTypeNull`` ``~ | null | Null | NULL``
## ``yTypeInteger`` ``[-+]? [0-9]+``
## ``yTypeFloat`` ``[-+]? ( \. [0-9]+ | [0-9]+ ( \. [0-9]* )? ) ( [eE] [-+]? [0-9]+ )?``
## ``yTypeFloatInf`` ``[-+]? ( \.inf | \.Inf | \.INF )``
## ``yTypeFloatNaN`` ``\.nan | \.NaN | \.NAN``
## ``yTypeBoolTrue`` ``true | True | TRUE``
## ``yTypeBoolFalse`` ``false | False | FALSE``
## ``yTypeNull`` ``null | Null | NULL | ~``
## ``yTypeTimestamp`` see `here <http://yaml.org/type/timestamp.html>`_.
## ``yTypeUnknown`` ``*``
## ================== =========================
@ -43,25 +45,19 @@ type
ythInitial,
ythF, ythFA, ythFAL, ythFALS, ythFALSE,
ythN, ythNU, ythNUL, ythNULL,
ythNO,
ythO, ythON,
ythOF, ythOFF,
ythT, ythTR, ythTRU, ythTRUE,
ythY, ythYE, ythYES,
ythPoint, ythPointI, ythPointIN, ythPointINF,
ythPointN, ythPointNA, ythPointNAN,
ythLowerFA, ythLowerFAL, ythLowerFALS,
ythLowerNU, ythLowerNUL,
ythLowerOF,
ythLowerTR, ythLowerTRU,
ythLowerYE,
ythLowerF, ythLowerFA, ythLowerFAL, ythLowerFALS,
ythLowerN, ythLowerNU, ythLowerNUL,
ythLowerT, ythLowerTR, ythLowerTRU,
ythPointLowerIN, ythPointLowerN, ythPointLowerNA,
ythPointLowerI, ythPointLowerIN,
ythPointLowerN, ythPointLowerNA,
ythMinus, yth0, ythInt1, ythInt1Zero, ythInt2, ythInt2Zero, ythInt3,
ythInt3Zero, ythInt4, ythInt4Zero, ythInt,
ythMinus, ythPlus, ythInt1, ythInt2, ythInt3, ythInt4, ythInt,
ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent,
ythYearMinus, ythMonth1, ythMonth2, ythMonthMinus, ythMonthMinusNoYmd,
@ -111,63 +107,33 @@ template advanceTypeHint(ch: char) {.dirty.} =
typeHintStateMachine ch:
of '~': ythInitial => ythNULL
of '.':
[yth0, ythInt1Zero, ythInt1, ythInt2, ythInt3, ythInt4, ythInt] => ythDecimal
[ythInitial, ythMinus] => ythPoint
ythSecond2 => ythFraction
[ythInt1, ythInt2, ythInt3, ythInt4, ythInt] => ythDecimal
[ythInitial, ythMinus, ythPlus] => ythPoint
ythSecond2 => ythFraction
of '+':
ythNumE => ythNumEPlusMinus
ythInitial => ythPlus
ythNumE => ythNumEPlusMinus
[ythFraction, ythSecond2] => ythAfterTimePlusMinus
of '-':
ythInitial => ythMinus
ythNumE => ythNumEPlusMinus
[ythInt4, ythInt4Zero] => ythYearMinus
ythInt4 => ythYearMinus
ythMonth1 => ythMonthMinusNoYmd
ythMonth2 => ythMonthMinus
[ythFraction, ythSecond2] => ythAfterTimePlusMinus
of '_':
[ythInt1, ythInt2, ythInt3, ythInt4] => ythInt
[ythInt, ythDecimal] => nil
[ythInt, ythDecimal] => nil
of ':':
[ythHour1, ythHour2] => ythHourColon
ythMinute2 => ythMinuteColon
[ythTzHour1, ythTzHour2] => ythTzHourColon
of '0':
ythInitial => ythInt1Zero
ythMinus => yth0
[ythNumE, ythNumEPlusMinus] => ythExponent
ythInt1 => ythInt2
ythInt1Zero => ythInt2Zero
ythInt2 => ythInt3
ythInt2Zero => ythInt3Zero
ythInt3 => ythInt4
ythInt3Zero => ythInt4Zero
ythInt4 => ythInt
ythYearMinus => ythMonth1
ythMonth1 => ythMonth2
ythMonthMinus => ythDay1
ythMonthMinusNoYmd => ythDay1NoYmd
ythDay1 => ythDay2
ythDay1NoYmd => ythDay2NoYmd
[ythAfterDaySpace, ythAfterDayT] => ythHour1
ythHour1 => ythHour2
ythHourColon => ythMinute1
ythMinute1 => ythMinute2
ythMinuteColon => ythSecond1
ythSecond1 => ythSecond2
ythAfterTimePlusMinus => ythTzHour1
ythTzHour1 => ythTzHour2
ythTzHourColon => ythTzMinute1
ythTzMinute1 => ythTzMinute2
[ythInt, ythDecimal, ythExponent, ythFraction] => nil
of '1'..'9':
of '0'..'9':
ythInitial => ythInt1
ythInt1 => ythInt2
ythInt1Zero => ythInt2Zero
ythInt2 => ythInt3
ythInt2Zero => ythInt3Zero
ythInt3 => ythInt4
ythInt3Zero => ythInt4Zero
[ythInt4, ythMinus] => ythInt
[ythInt4, ythMinus, ythPlus] => ythInt
[ythNumE, ythNumEPlusMinus] => ythExponent
ythYearMinus => ythMonth1
ythMonth1 => ythMonth2
@ -185,35 +151,33 @@ template advanceTypeHint(ch: char) {.dirty.} =
ythTzHour1 => ythTzHour2
ythTzHourColon => ythTzMinute1
ythTzMinute1 => ythTzMinute2
ythPoint => ythDecimal
[ythInt, ythDecimal, ythExponent, ythFraction] => nil
of 'a':
ythF => ythLowerFA
ythPointN => ythPointNA
ythPointLowerN => ythPointLowerNA
[ythF, ythLowerF] => ythLowerFA
ythPointN => ythPointNA
ythPointLowerN => ythPointLowerNA
of 'A':
ythF => ythFA
ythPointN => ythPointNA
of 'e':
[yth0, ythInt, ythDecimal] => ythNumE
[ythInt, ythDecimal,
ythInt1, ythInt2, ythInt3, ythInt4] => ythNumE
ythLowerFALS => ythFALSE
ythLowerTRU => ythTRUE
ythY => ythLowerYE
of 'E':
[yth0, ythInt, ythDecimal] => ythNumE
[ythInt, ythDecimal,
ythInt1, ythInt2, ythInt3, ythInt4] => ythNumE
ythFALS => ythFALSE
ythTRU => ythTRUE
ythY => ythYE
of 'f':
ythInitial => ythF
ythO => ythLowerOF
ythLowerOF => ythOFF
ythInitial => ythLowerF
ythPointLowerIN => ythPointINF
of 'F':
ythInitial => ythF
ythO => ythOF
ythOF => ythOFF
ythPointIN => ythPointINF
of 'i', 'I': ythPoint => ythPointI
of 'i': ythPoint => ythPointLowerI
of 'I': ythPoint => ythPointI
of 'l':
ythLowerNU => ythLowerNUL
ythLowerNUL => ythNULL
@ -223,38 +187,33 @@ template advanceTypeHint(ch: char) {.dirty.} =
ythNUL => ythNULL
ythFA => ythFAL
of 'n':
ythInitial => ythN
ythO => ythON
ythInitial => ythLowerN
ythPoint => ythPointLowerN
ythPointI => ythPointLowerIN
[ythPointI, ythPointLowerI] => ythPointLowerIN
ythPointLowerNA => ythPointNAN
of 'N':
ythInitial => ythN
ythO => ythON
ythPoint => ythPointN
ythPointI => ythPointIN
ythPointNA => ythPointNAN
of 'o', 'O':
ythInitial => ythO
ythN => ythNO
of 'r': ythT => ythLowerTR
of 'r': [ythT, ythLowerT] => ythLowerTR
of 'R': ythT => ythTR
of 's':
ythLowerFAL => ythLowerFALS
ythLowerYE => ythYES
of 'S':
ythFAL => ythFALS
ythYE => ythYES
of 't', 'T':
of 't':
ythInitial => ythLowerT
[ythDay1, ythDay2, ythDay1NoYmd, ythDay2NoYmd] => ythAfterDayT
of 'T':
ythInitial => ythT
[ythDay1, ythDay2, ythDay1NoYmd, ythDay2NoYmd] => ythAfterDayT
of 'u':
ythN => ythLowerNU
ythLowerTR => ythLowerTRU
[ythN, ythLowerN] => ythLowerNU
ythLowerTR => ythLowerTRU
of 'U':
ythN => ythNU
ythTR => ythTRU
of 'y', 'Y': ythInitial => ythY
of 'Z': [ythSecond2, ythFraction, ythAfterTimeSpace] => ythAfterTimeZ
of ' ', '\t':
[ythSecond2, ythFraction] => ythAfterTimeSpace
@ -268,9 +227,9 @@ proc guessType*(scalar: string): TypeHint {.raises: [].} =
for c in scalar: advanceTypeHint(c)
case typeHintState
of ythNULL, ythInitial: result = yTypeNull
of ythTRUE, ythON, ythYES, ythY: result = yTypeBoolTrue
of ythFALSE, ythOFF, ythNO, ythN: result = yTypeBoolFalse
of ythInt1, ythInt2, ythInt3, ythInt4, ythInt, yth0, ythInt1Zero: result = yTypeInteger
of ythTRUE: result = yTypeBoolTrue
of ythFALSE: result = yTypeBoolFalse
of ythInt1, ythInt2, ythInt3, ythInt4, ythInt: result = yTypeInteger
of ythDecimal, ythExponent: result = yTypeFloat
of ythPointINF: result = yTypeFloatInf
of ythPointNAN: result = yTypeFloatNaN