Use variant object types for heterogeneous data

* Made variant object types work (really this time)
 * Added `markAsImplicit`
 * Implemented implicit variant object types
 * Added documentation
This commit is contained in:
Felix Krause 2016-06-05 19:29:16 +02:00
parent e81da97a17
commit 48aeff20c0
4 changed files with 322 additions and 43 deletions

View File

@ -343,6 +343,72 @@ Loading Nim objects from JSON
Processing a Sequence of Heterogeneous Items
--------------------------------------------
… With variant objects
......................
.. raw:: html
<table class="quickstart-example"><thead><tr><th>code.nim</th>
<th>in.yaml</th></tr></thead><tbody><tr><td>
.. code-block:: nim
import yaml
type
Person = object
name: string
ContainerKind = enum
ckString, ckInt, ckBool, ckPerson, ckNone
Container = object
case kind: ContainerKind
of ckString:
strVal: string
of ckInt:
intVal: int
of ckBool:
boolVal: bool
of ckPerson:
personVal: Person
of ckNone:
discard
setTagUri(Person, "!nim:demo:Person")
# tell NimYAML to use Container as implicit type.
# only possible with variant object types where
# each branch contains at most one object.
markAsImplicit(Container)
var list: seq[Container]
var s = newFileStream("in.yaml")
load(s, list)
s.close()
assert(list[0].kind == ckString)
assert(list[0].strVal == "this is a string")
# and so on
.. raw:: html
</td>
<td>
.. code-block:: yaml
%YAML 1.2
---
- this is a string
- 42
- false
- !!str 23
- !nim:demo:Person {name: Trillian}
- !!null
.. raw:: html
</td></tr></tbody></table>
… With the Sequential API
.........................
.. raw:: html
<table class="quickstart-example"><thead><tr><th>code.nim</th>
<th>in.yaml</th></tr></thead><tbody><tr><td>

View File

@ -86,7 +86,8 @@ cannot be loaded to Nim collection types. For example, this sequence:
- !!string foo
Cannot be loaded to a Nim ``seq``. For this reason, you cannot load YAML's
native ``!!map`` and ``!!seq`` types directly into Nim types.
native ``!!map`` and ``!!seq`` types directly into Nim types. However, you can
use variant object types to process heterogeneous value lists, see below.
Nim ``seq`` types may be ``nil``. This is handled by serializing them to an
empty scalar with the tag ``!nim:nil:seq``.
@ -115,12 +116,81 @@ conditions must be met:
- All fields of an object type must be accessible from the code position where
you call NimYAML. If an object has non-public member fields, it can only be
processed in the module where it is defined.
- The object may not contain a ``case`` clause.
- The object may not have a generic parameter
NimYAML will present enum types as YAML scalars, and tuple and object types as
YAML maps. Some of the conditions above may be loosened in future releases.
Variant Object Types
....................
A *variant object type* is an object type that contains one or more ``case``
clauses. NimYAML currently supports variant object types. However, this feature
is **highly experimental**. Only the currently accessible fields of a variant
object type are dumped, and only those may be present when loading. The
discriminator field(s) are treated like all other fields. The value of a
discriminator field must occur before any value of a field that depends on it.
This violates the YAML specification and therefore will be changed in the
future.
While dumping variant object types directly is currently not production ready,
you can use them for processing heterogeneous data sets. For example, if you
have a YAML document which contains differently typed values in the same list
like this:
.. code-block:: yaml
%YAML 1.2
---
- 42
- this is a string
- !!null
You can define a variant object type that can hold all types that occur in this
list in order to load it:
.. code-block:: nim
import yaml
type
ContainerKind = enum
ckInt, ckString, ckNone
Container = object
case kind: ContainerKind
of ckInt:
intVal: int
of ckString:
strVal: string
of ckNone:
discard
markAsImplicit(Container)
var
list: seq[Container]
s = newFileStream("in.yaml")
load(s, list)
``markAsImplicit`` tells NimYAML that you want to use the type ``Container``
implicitly, i.e. its fields are not visible in YAML, and are set dependent on
the value type that gets loaded into it. The type ``Container`` must fullfil the
following requirements:
- It must contain exactly one ``case`` clause, and nothing else.
- Each branch of the ``case`` clause must contain exactly one field, with one
exception: There may be at most one branch that contains no field at all.
- It must not be a derived object type (this is currently not enforced)
When loading the sequence, NimYAML writes the value into the first field that
can hold the value's type. All complex values (i.e. non-scalar values) *must*
have a tag in the YAML source, because NimYAML would otherwise be unable to
determine their type. The type of scalar values will be guessed if no tag is
available, but be aware that ``42`` can fit in both ``int8`` and ``int16``, so
in the case you have fields for both types, you should annotate the value.
When dumping the sequence, NimYAML will always annotate a tag to each value it
outputs. This is to avoid possible ambiguity when loading. If a branch without
a field exists, it is represented as a ``!!null`` value.
Tags
====

View File

@ -387,14 +387,6 @@ proc representObject*[K, V](value: OrderedTable[K, V], ts: TagStyle,
yield endMapEvent()
yield endSeqEvent()
proc isVariant(t: typedesc): bool {.compileTime.} =
let typeDesc = getType(t)
if typeDesc.len > 1:
for child in typeDesc[1].children:
if child.kind == nnkRecCase:
return true
return false
proc yamlTag*(T: typedesc[object|enum]):
TagId {.inline, raises: [].} =
var uri = "!nim:custom:" & (typetraits.name(type(T)))
@ -415,27 +407,60 @@ proc yamlTag*(T: typedesc[tuple]):
try: serializationTagLibrary.tags[uri]
except KeyError: serializationTagLibrary.registerUri(uri)
macro constructFieldValue(t: typedesc, stream: expr, context: expr,
name: expr, o: expr): stmt =
let tDesc = getType(getType(t)[1])
result = newNimNode(nnkCaseStmt).add(name)
for child in tDesc[1].children:
if child.kind == nnkRecCase:
let
discriminant = newDotExpr(o, newIdentNode($child[0]))
discType = newCall("type", discriminant)
var disOb = newNimNode(nnkOfBranch).add(newStrLitNode($child[0]))
disOb.add(newStmtList(
newNimNode(nnkVarSection).add(
newNimNode(nnkIdentDefs).add(
newIdentNode("value"), discType, newEmptyNode())),
newCall("constructChild", stream, context, newIdentNode("value")),
newCall("reset", o),
newAssignment(discriminant, newIdentNode("value"))))
result.add(disOb)
for bIndex in 1 .. len(child) - 1:
let discTest = infix(discriminant, "==", child[bIndex][0])
for item in child[bIndex][1].children:
assert item.kind == nnkSym
var ob = newNimNode(nnkOfBranch).add(newStrLitNode($item))
let field = newDotExpr(o, newIdentNode($item))
var ifStmt = newIfStmt((cond: discTest, body: newStmtList(
newCall("constructChild", stream, context, field))))
ifStmt.add(newNimNode(nnkElse).add(newNimNode(nnkDiscardStmt).add(
newEmptyNode()))) # todo: raise exception here
ob.add(newStmtList(ifStmt))
result.add(ob)
else:
assert child.kind == nnkSym
var ob = newNimNode(nnkOfBranch).add(newStrLitNode($child))
let field = newDotExpr(o, newIdentNode($child))
ob.add(newStmtList(newCall("constructChild", stream, context, field)))
result.add(ob)
proc constructObject*[O: object|tuple](
s: var YamlStream, c: ConstructionContext, result: var O)
{.raises: [YamlConstructionError, YamlStreamError].} =
## constructs a Nim object or tuple from a YAML mapping
let e = s.next()
if e.kind != yamlStartMap:
raise newException(YamlConstructionError, "Expected map start, got " &
$e.kind)
raise newException(YamlConstructionError, "While constructing " &
typetraits.name(O) & ": Expected map start, got " & $e.kind)
while s.peek.kind != yamlEndMap:
# todo: check for duplicates in input and raise appropriate exception
# also todo: check for missing items and raise appropriate exception
let e = s.next()
if e.kind != yamlScalar:
raise newException(YamlConstructionError,
"Expected field name, got " & $e.kind)
let name = e.scalarContent
when compiles(implicitVariantObject(O)):
discard
else:
for fname, value in fieldPairs(result):
if fname == name:
constructChild(s, c, value)
break
constructFieldValue(O, s, c, name, result)
discard s.next()
proc representObject*[O: object|tuple](value: O, ts: TagStyle,
@ -478,12 +503,92 @@ proc representObject*[O: enum](value: O, ts: TagStyle,
proc yamlTag*[O](T: typedesc[ref O]): TagId {.inline, raises: [].} = yamlTag(O)
macro constructImplicitVariantObject(s, c, r, possibleTagIds: expr,
t: typedesc): stmt =
let tDesc = getType(getType(t)[1])
assert tDesc.kind == nnkObjectTy
let recCase = tDesc[1][0]
assert recCase.kind == nnkRecCase
let
discriminant = newDotExpr(r, newIdentNode($recCase[0]))
discType = newCall("type", discriminant)
var ifStmt = newNimNode(nnkIfStmt)
for i in 1 .. recCase.len - 1:
assert recCase[i].kind == nnkOfBranch
var branch = newNimNode(nnkElifBranch)
var branchContent = newStmtList(newAssignment(discriminant, recCase[i][0]))
case recCase[i][1].len
of 0:
branch.add(infix(newIdentNode("yTagNull"), "in", possibleTagIds))
branchContent.add(newNimNode(nnkDiscardStmt).add(newCall("next", s)))
of 1:
let field = newDotExpr(r, newIdentNode($recCase[i][1][0]))
branch.add(infix(
newCall("yamlTag", newCall("type", field)), "in", possibleTagIds))
branchContent.add(newCall("constructChild", s, c, field))
else: assert false
branch.add(branchContent)
ifStmt.add(branch)
let raiseStmt = newNimNode(nnkRaiseStmt).add(
newCall("newException", newIdentNode("YamlConstructionError"),
infix(newStrLitNode("This value type does not map to any field in " &
typetraits.name(t) & ": "), "&",
newCall("uri", newIdentNode("serializationTagLibrary"),
newNimNode(nnkBracketExpr).add(possibleTagIds, newIntLitNode(0)))
)
))
ifStmt.add(newNimNode(nnkElse).add(newNimNode(nnkTryStmt).add(
newStmtList(raiseStmt), newNimNode(nnkExceptBranch).add(
newIdentNode("KeyError"), newStmtList(newCall("assert", newLit(false)))
))))
result = newStmtList(newCall("reset", r), ifStmt)
proc constructChild*[T](s: var YamlStream, c: ConstructionContext,
result: var T) =
let item = s.peek()
when compiles(implicitVariantObject(result)):
var possibleTagIds = newSeq[TagId]()
case item.kind
of yamlScalar:
if item.scalarTag notin [yTagQuestionMark, yTagExclamationMark, yamlTag(T)]:
case item.scalarTag
of yTagQuestionMark:
case guessType(item.scalarContent)
of yTypeInteger:
possibleTagIds.add([yamlTag(int), yamlTag(int8), yamlTag(int16),
yamlTag(int32), yamlTag(int64)])
if item.scalarContent[0] != '-':
possibleTagIds.add([yamlTag(uint), yamlTag(uint8), yamlTag(uint16),
yamlTag(uint32), yamlTag(uint64)])
of yTypeFloat, yTypeFloatInf, yTypeFloatNaN:
possibleTagIds.add([yamlTag(float), yamlTag(float32),
yamlTag(float64)])
of yTypeBoolTrue, yTypeBoolFalse:
possibleTagIds.add(yamlTag(bool))
of yTypeNull:
raise newException(YamlConstructionError, "not implemented!")
of yTypeUnknown:
possibleTagIds.add(yamlTag(string))
of yTagExclamationMark:
possibleTagIds.add(yamlTag(string))
else:
possibleTagIds.add(item.scalarTag)
of yamlStartMap:
if item.mapTag in [yTagQuestionMark, yTagExclamationMark]:
raise newException(YamlConstructionError,
"Complex value of implicit variant object type must have a tag.")
possibleTagIds.add(item.mapTag)
of yamlStartSeq:
if item.seqTag in [yTagQuestionMark, yTagExclamationMark]:
raise newException(YamlConstructionError,
"Complex value of implicit variant object type must have a tag.")
possibleTagIds.add(item.seqTag)
else: assert false
constructImplicitVariantObject(s, c, result, possibleTagIds, T)
else:
case item.kind
of yamlScalar:
if item.scalarTag notin [yTagQuestionMark, yTagExclamationMark,
yamlTag(T)]:
raise newException(YamlConstructionError, "Wrong tag for " &
typetraits.name(T))
elif item.scalarAnchor != yAnchorNone:
@ -636,6 +741,18 @@ proc representChild*[O](value: ref O, ts: TagStyle, c: SerializationContext):
proc representChild*[O](value: O, ts: TagStyle,
c: SerializationContext): RawYamlStream =
when compiles(implicitVariantObject(value)):
# todo: this would probably be nicer if constructed with a macro
var count = 0
for name, field in fieldPairs(value):
if count > 0:
result =
representChild(field, if ts == tsAll: tsAll else: tsRootOnly, c)
inc(count)
if count == 1:
result = iterator(): YamlStreamEvent =
yield scalarEvent("~", yTagNull)
else:
result = representObject(value, ts, c, if ts == tsNone:
yTagQuestionMark else: yamlTag(O))
@ -675,7 +792,7 @@ proc load*[K](input: Stream, target: var K) =
let e = (ref YamlStreamError)(getCurrentException())
if e.parent of IOError: raise (ref IOError)(e.parent)
elif e.parent of YamlParserError: raise (ref YamlParserError)(e.parent)
else: assert(false)
else: assert false
proc setAnchor(a: var AnchorId, q: var Table[pointer, AnchorId])
{.inline.} =

View File

@ -693,6 +693,32 @@ template setTagUri*(t: typedesc, uri: string, idName: expr): stmt =
proc yamlTag*(T: typedesc[t]): TagId {.inline, raises: [].} = idName
## autogenerated
proc canBeImplicit(t: typedesc): bool {.compileTime.} =
let tDesc = getType(t)
if tDesc.kind != nnkObjectTy: return false
if tDesc[1].len != 1: return false
if tDesc[1][0].kind != nnkRecCase: return false
var foundEmptyBranch = false
for i in 1.. tDesc[1][0].len - 1:
case tDesc[1][0][i][1].len # branch contents
of 0:
if foundEmptyBranch: return false
else: foundEmptyBranch = true
of 1: discard
else: return false
return true
template markAsImplicit*(t: typedesc): stmt =
## Mark a variant object type as implicit. This requires the type to consist
## of nothing but a case expression and each branch of the case expression
## containing exactly one field - with the exception that one branch may
## contain zero fields.
when canBeImplicit(t):
# this will be checked by means of compiles(implicitVariantObject(...))
proc implicitVariantObject*(unused: t) = discard
else:
{. fatal: "This type cannot be marked as implicit" .}
static:
# standard YAML tags used by serialization
registeredUris.add("!")