1679 lines
41 KiB
Go
1679 lines
41 KiB
Go
// Copyright 2015 Jean Niklas L'orange. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package edn
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math/big"
|
|
"reflect"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var (
|
|
errInternal = errors.New("Illegal internal parse state")
|
|
errNoneLeft = errors.New("No more tokens to read")
|
|
errUnexpected = errors.New("Unexpected token")
|
|
errIllegalRune = errors.New("Illegal rune form")
|
|
)
|
|
|
|
type UnknownTagError struct {
|
|
tag []byte
|
|
value []byte
|
|
inType reflect.Type
|
|
}
|
|
|
|
func (ute UnknownTagError) Error() string {
|
|
return fmt.Sprintf("Unable to decode %s%s into %s", string(ute.tag),
|
|
string(ute.value), ute.inType)
|
|
}
|
|
|
|
// Unmarshal parses the EDN-encoded data and stores the result in the value
|
|
// pointed to by v.
|
|
//
|
|
// Unmarshal uses the inverse of the encodings that Marshal uses, allocating
|
|
// maps, slices, and pointers as necessary, with the following additional rules:
|
|
//
|
|
// First, if the value to store the result into implements edn.Unmarshaler, it
|
|
// is called.
|
|
//
|
|
// If the value is tagged and the tag is known, the EDN value is translated into
|
|
// the input of the tag convert function. If no error happens during converting,
|
|
// the result of the conversion is then coerced into v if possible.
|
|
//
|
|
// To unmarshal EDN into a pointer, Unmarshal first handles the case of the EDN
|
|
// being the EDN literal nil. In that case, Unmarshal sets the pointer to nil.
|
|
// Otherwise, Unmarshal unmarshals the EDN into the value pointed at by the
|
|
// pointer. If the pointer is nil, Unmarshal allocates a new value for it to
|
|
// point to.
|
|
//
|
|
// To unmarshal EDN into a struct, Unmarshal matches incoming object
|
|
// keys to the keys used by Marshal (either the struct field name or its tag),
|
|
// preferring an exact match but also accepting a case-insensitive match.
|
|
//
|
|
// To unmarshal EDN into an interface value,
|
|
// Unmarshal stores one of these in the interface value:
|
|
//
|
|
// bool, for EDN booleans
|
|
// float64, for EDN floats
|
|
// int64, for EDN integers
|
|
// int32, for EDN characters
|
|
// string, for EDN strings
|
|
// []interface{}, for EDN vectors and lists
|
|
// map[interface{}]interface{}, for EDN maps
|
|
// map[interface{}]bool, for EDN sets
|
|
// nil for EDN nil
|
|
// edn.Tag for unknown EDN tagged elements
|
|
// T for known EDN tagged elements, where T is the result of the converter function
|
|
//
|
|
// To unmarshal an EDN vector/list into a slice, Unmarshal resets the slice to
|
|
// nil and then appends each element to the slice.
|
|
//
|
|
// To unmarshal an EDN map into a Go map, Unmarshal replaces the map
|
|
// with an empty map and then adds key-value pairs from the object to
|
|
// the map.
|
|
//
|
|
// If a EDN value is not appropriate for a given target type, or if a EDN number
|
|
// overflows the target type, Unmarshal skips that field and completes the
|
|
// unmarshalling as best it can. If no more serious errors are encountered,
|
|
// Unmarshal returns an UnmarshalTypeError describing the earliest such error.
|
|
//
|
|
// The EDN nil value unmarshals into an interface, map, pointer, or slice by
|
|
// setting that Go value to nil.
|
|
//
|
|
// When unmarshaling strings, invalid UTF-8 or invalid UTF-16 surrogate pairs
|
|
// are not treated as an error. Instead, they are replaced by the Unicode
|
|
// replacement character U+FFFD.
|
|
//
|
|
func Unmarshal(data []byte, v interface{}) error {
|
|
return newDecoder(bufio.NewReader(bytes.NewBuffer(data))).Decode(v)
|
|
}
|
|
|
|
// UnmarshalString works like Unmarshal, but accepts a string as input instead
|
|
// of a byte slice.
|
|
func UnmarshalString(data string, v interface{}) error {
|
|
return newDecoder(bufio.NewReader(bytes.NewBufferString(data))).Decode(v)
|
|
}
|
|
|
|
// NewDecoder returns a new decoder that reads from r.
|
|
//
|
|
// The decoder introduces its own buffering and may read data from r beyond the
|
|
// EDN values requested.
|
|
func NewDecoder(r io.Reader) *Decoder {
|
|
return newDecoder(bufio.NewReader(r))
|
|
}
|
|
|
|
// Buffered returns a reader of the data remaining in the Decoder's buffer. The
|
|
// reader is valid until the next call to Decode.
|
|
func (d *Decoder) Buffered() *bufio.Reader {
|
|
return d.rd
|
|
}
|
|
|
|
// AddTagFn adds a tag function to the decoder's TagMap. Note that TagMaps are
|
|
// mutable: If Decoder A and B share TagMap, then adding a tag function to one
|
|
// may modify both.
|
|
func (d *Decoder) AddTagFn(tagname string, fn interface{}) error {
|
|
return d.tagmap.AddTagFn(tagname, fn)
|
|
}
|
|
|
|
// MustAddTagFn adds a tag function to the decoder's TagMap like AddTagFn,
|
|
// except this function also panics if the tag could not be added.
|
|
func (d *Decoder) MustAddTagFn(tagname string, fn interface{}) {
|
|
d.tagmap.MustAddTagFn(tagname, fn)
|
|
}
|
|
|
|
// AddTagStruct adds a tag struct to the decoder's TagMap. Note that TagMaps are
|
|
// mutable: If Decoder A and B share TagMap, then adding a tag struct to one
|
|
// may modify both.
|
|
func (d *Decoder) AddTagStruct(tagname string, example interface{}) error {
|
|
return d.tagmap.AddTagStruct(tagname, example)
|
|
}
|
|
|
|
// UseTagMap sets the TagMap provided as the TagMap for this decoder.
|
|
func (d *Decoder) UseTagMap(tm *TagMap) {
|
|
d.tagmap = tm
|
|
}
|
|
|
|
// UseMathContext sets the given math context as default math context for this
|
|
// decoder.
|
|
func (d *Decoder) UseMathContext(mc MathContext) {
|
|
d.mc = &mc
|
|
}
|
|
|
|
func (d *Decoder) mathContext() *MathContext {
|
|
if d.mc != nil {
|
|
return d.mc
|
|
}
|
|
return &GlobalMathContext
|
|
}
|
|
|
|
// DisallowUnknownFields causes the Decoder to return an error when the
|
|
// destination is a struct and the input contains keys which do not match any
|
|
// non-ignored, exported fields in the destination.
|
|
func (d *Decoder) DisallowUnknownFields() {
|
|
d.disallowUnknownFields = true
|
|
}
|
|
|
|
// Unmarshaler is the interface implemented by objects that can unmarshal an EDN
|
|
// description of themselves. The input can be assumed to be a valid encoding of
|
|
// an EDN value. UnmarshalEDN must copy the EDN data if it wishes to retain the
|
|
// data after returning.
|
|
type Unmarshaler interface {
|
|
UnmarshalEDN([]byte) error
|
|
}
|
|
|
|
type parseState int
|
|
|
|
const (
|
|
parseToplevel = iota
|
|
parseList
|
|
parseVector
|
|
parseMap
|
|
parseSet
|
|
parseTagged
|
|
parseDiscard
|
|
)
|
|
|
|
// A Decoder reads and decodes EDN objects from an input stream.
|
|
type Decoder struct {
|
|
disallowUnknownFields bool
|
|
|
|
lex *lexer
|
|
savedError error
|
|
rd *bufio.Reader
|
|
tagmap *TagMap
|
|
mc *MathContext
|
|
// parser-specific
|
|
prevSlice []byte
|
|
prevTtype tokenType
|
|
undo bool
|
|
// if nextToken returned lexEndPrev, we must write the leftover value at
|
|
// next call to nextToken
|
|
hasLeftover bool
|
|
leftover rune
|
|
}
|
|
|
|
// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
|
|
// (The argument to Unmarshal must be a non-nil pointer.)
|
|
type InvalidUnmarshalError struct {
|
|
Type reflect.Type
|
|
}
|
|
|
|
func (e *InvalidUnmarshalError) Error() string {
|
|
if e.Type == nil {
|
|
return "edn: Unmarshal(nil)"
|
|
}
|
|
|
|
if e.Type.Kind() != reflect.Ptr {
|
|
return "edb: Unmarshal(non-pointer " + e.Type.String() + ")"
|
|
}
|
|
return "edn: Unmarshal(nil " + e.Type.String() + ")"
|
|
}
|
|
|
|
// An UnmarshalTypeError describes a EDN value that was
|
|
// not appropriate for a value of a specific Go type.
|
|
type UnmarshalTypeError struct {
|
|
Value string // description of EDN value - "bool", "array", "number -5"
|
|
Type reflect.Type // type of Go value it could not be assigned to
|
|
}
|
|
|
|
func (e *UnmarshalTypeError) Error() string {
|
|
return "edn: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
|
|
}
|
|
|
|
// UnhashableError is an error which occurs when the decoder attempted to assign
|
|
// an unhashable key to a map or set. The position close to where value was
|
|
// found is provided to help debugging.
|
|
type UnhashableError struct {
|
|
Position int64
|
|
}
|
|
|
|
func (e *UnhashableError) Error() string {
|
|
return "edn: unhashable type at position " + strconv.FormatInt(e.Position, 10) + " in input"
|
|
}
|
|
|
|
type UnknownFieldError struct {
|
|
Field string // the field name
|
|
Type reflect.Type // type of Go struct with a missing field
|
|
}
|
|
|
|
func (e *UnknownFieldError) Error() string {
|
|
return "edn: cannot find a field '" + e.Field + "' in a struct " + e.Type.String() + " to unmarshal into"
|
|
}
|
|
|
|
// Decode reads the next EDN-encoded value from its input and stores it in the
|
|
// value pointed to by v.
|
|
//
|
|
// See the documentation for Unmarshal for details about the conversion of EDN
|
|
// into a Go value.
|
|
func (d *Decoder) Decode(val interface{}) (err error) {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
// if unhashable, return ErrUnhashable. Else panic unless it's an error
|
|
// from the decoder itself.
|
|
if rerr, ok := r.(runtime.Error); ok {
|
|
if strings.Contains(rerr.Error(), "unhashable") {
|
|
err = &UnhashableError{Position: d.lex.position}
|
|
} else {
|
|
panic(r)
|
|
}
|
|
} else {
|
|
err = r.(error)
|
|
}
|
|
}
|
|
}()
|
|
|
|
err = d.more()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
rv := reflect.ValueOf(val)
|
|
if rv.Kind() != reflect.Ptr || rv.IsNil() {
|
|
return &InvalidUnmarshalError{reflect.TypeOf(val)}
|
|
}
|
|
|
|
d.value(rv)
|
|
|
|
return nil
|
|
}
|
|
|
|
func newDecoder(buf *bufio.Reader) *Decoder {
|
|
lex := lexer{}
|
|
lex.reset()
|
|
return &Decoder{
|
|
lex: &lex,
|
|
rd: buf,
|
|
hasLeftover: false,
|
|
leftover: '\uFFFD',
|
|
tagmap: new(TagMap),
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) getTagFn(tagname string) *reflect.Value {
|
|
d.tagmap.RLock()
|
|
f, ok := d.tagmap.m[tagname]
|
|
d.tagmap.RUnlock()
|
|
if ok {
|
|
return &f
|
|
}
|
|
globalTags.RLock()
|
|
f, ok = globalTags.m[tagname]
|
|
globalTags.RUnlock()
|
|
if ok {
|
|
return &f
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Decoder) error(err error) {
|
|
panic(err)
|
|
}
|
|
|
|
func (d *Decoder) doUndo(bs []byte, ttype tokenType) {
|
|
if d.undo {
|
|
d.error(errInternal) // this is LL(1), so this shouldn't happen
|
|
}
|
|
d.undo = true
|
|
d.prevSlice = bs
|
|
d.prevTtype = ttype
|
|
}
|
|
|
|
// array consumes an array from d.data[d.off-1:], decoding into the value v.
|
|
// the first byte of the array ('[') has been read already.
|
|
func (d *Decoder) array(v reflect.Value, endType tokenType) {
|
|
// Check for unmarshaler.
|
|
u, pv := d.indirect(v, false)
|
|
if u != nil {
|
|
switch endType {
|
|
case tokenVectorEnd:
|
|
d.doUndo([]byte{'['}, tokenVectorStart)
|
|
case tokenListEnd:
|
|
d.doUndo([]byte{'('}, tokenListStart)
|
|
case tokenSetEnd:
|
|
d.doUndo([]byte{'#', '{'}, tokenSetStart)
|
|
}
|
|
bs, err := d.nextValueBytes()
|
|
if err == nil {
|
|
err = u.UnmarshalEDN(bs)
|
|
}
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return
|
|
}
|
|
v = pv
|
|
|
|
// Check type of target.
|
|
switch v.Kind() {
|
|
case reflect.Interface:
|
|
if v.NumMethod() == 0 {
|
|
// Decoding into nil interface? Switch to non-reflect code.
|
|
v.Set(reflect.ValueOf(d.arrayInterface(endType)))
|
|
return
|
|
}
|
|
// Otherwise it's invalid.
|
|
fallthrough
|
|
default:
|
|
d.error(&UnmarshalTypeError{"array", v.Type()})
|
|
return
|
|
case reflect.Array:
|
|
case reflect.Slice:
|
|
break
|
|
}
|
|
|
|
i := 0
|
|
for {
|
|
// Look ahead for ] - can only happen on first iteration.
|
|
bs, ttype, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if ttype == endType {
|
|
break
|
|
}
|
|
d.doUndo(bs, ttype)
|
|
|
|
// Get element of array, growing if necessary.
|
|
if v.Kind() == reflect.Slice {
|
|
// Grow slice if necessary
|
|
if i >= v.Cap() {
|
|
newcap := v.Cap() + v.Cap()/2
|
|
if newcap < 4 {
|
|
newcap = 4
|
|
}
|
|
newv := reflect.MakeSlice(v.Type(), v.Len(), newcap)
|
|
reflect.Copy(newv, v)
|
|
v.Set(newv)
|
|
}
|
|
if i >= v.Len() {
|
|
v.SetLen(i + 1)
|
|
}
|
|
}
|
|
|
|
if i < v.Len() {
|
|
// Decode into element.
|
|
d.value(v.Index(i))
|
|
} else {
|
|
// Ran out of fixed array: skip.
|
|
d.value(reflect.Value{})
|
|
}
|
|
i++
|
|
}
|
|
|
|
if i < v.Len() {
|
|
if v.Kind() == reflect.Array {
|
|
// Array. Zero the rest.
|
|
z := reflect.Zero(v.Type().Elem())
|
|
for ; i < v.Len(); i++ {
|
|
v.Index(i).Set(z)
|
|
}
|
|
} else {
|
|
v.SetLen(i)
|
|
}
|
|
}
|
|
if i == 0 && v.Kind() == reflect.Slice {
|
|
v.Set(reflect.MakeSlice(v.Type(), 0, 0))
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) arrayInterface(endType tokenType) interface{} {
|
|
var v = make([]interface{}, 0)
|
|
for {
|
|
// look out for endType
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
break
|
|
}
|
|
if tt == endType {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
v = append(v, d.valueInterface())
|
|
}
|
|
return v
|
|
}
|
|
|
|
func (d *Decoder) value(v reflect.Value) {
|
|
if !v.IsValid() {
|
|
// read value and ignore it
|
|
d.valueInterface()
|
|
return
|
|
}
|
|
|
|
bs, ttype, err := d.nextToken()
|
|
// check error first
|
|
if err != nil {
|
|
d.error(err)
|
|
return
|
|
}
|
|
switch ttype {
|
|
default:
|
|
d.error(errUnexpected)
|
|
case tokenSymbol, tokenKeyword, tokenString, tokenInt, tokenFloat, tokenChar:
|
|
d.literal(bs, ttype, v)
|
|
case tokenTag:
|
|
d.tag(bs, v)
|
|
case tokenListStart:
|
|
d.array(v, tokenListEnd)
|
|
case tokenVectorStart:
|
|
d.array(v, tokenVectorEnd)
|
|
case tokenSetStart:
|
|
d.set(v)
|
|
case tokenMapStart:
|
|
d.ednmap(v)
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) tag(tag []byte, v reflect.Value) {
|
|
// Check for unmarshaler.
|
|
u, pv := d.indirect(v, false)
|
|
if u != nil {
|
|
bs, err := d.nextValueBytes()
|
|
if err == nil {
|
|
err = u.UnmarshalEDN(append(append(tag, ' '), bs...))
|
|
}
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return
|
|
}
|
|
v = pv
|
|
|
|
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(d.tagInterface(tag)))
|
|
return
|
|
}
|
|
|
|
fn := d.getTagFn(string(tag[1:]))
|
|
if fn == nil {
|
|
// So in theory we'd have to match against any interface that could be
|
|
// assignable to the Tag type, to ensure we would decode whenever possible.
|
|
// That is any interface that specifies any combination of the methods
|
|
// MarshalEDN, UnmarshalEDN and String. I'm not sure if that makes sense
|
|
// though, so I've punted this for now.
|
|
bs, err := d.nextValueBytes()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
d.error(UnknownTagError{tag, bs, v.Type()})
|
|
} else {
|
|
tfn := fn.Type()
|
|
var result reflect.Value
|
|
// if not func, just match on struct shape
|
|
if tfn.Kind() != reflect.Func {
|
|
result = reflect.New(tfn).Elem()
|
|
d.value(result)
|
|
} else { // otherwise match on input value and call the function
|
|
inVal := reflect.New(tfn.In(0))
|
|
d.value(inVal)
|
|
res := fn.Call([]reflect.Value{inVal.Elem()})
|
|
if err, ok := res[1].Interface().(error); ok && err != nil {
|
|
d.error(err)
|
|
}
|
|
result = res[0]
|
|
}
|
|
// result is not necessarily direct, so we have to make it direct, but
|
|
// *only* if it's NOT null at every step. Which leads to the question: How
|
|
// do we unify these values? This is particularly hairy if these are double
|
|
// pointers or bigger.
|
|
|
|
// Currently we only attempt to solve this for results by checking if the
|
|
// result can be dereferenced into a value. The value will always be a
|
|
// non-pointer, so presumably we can assign it in this fashion as a
|
|
// temporary resolution.
|
|
if result.Type().AssignableTo(v.Type()) {
|
|
v.Set(result)
|
|
return
|
|
}
|
|
if result.Kind() == reflect.Ptr && !result.IsNil() &&
|
|
result.Elem().Type().AssignableTo(v.Type()) {
|
|
// is res a non-nil pointer to a value we can assign to? If yes, then
|
|
// let's just do that.
|
|
v.Set(result.Elem())
|
|
return
|
|
}
|
|
d.error(fmt.Errorf("Cannot assign %s to %s (tag issue?)", result.Type(), v.Type()))
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) tagInterface(tag []byte) interface{} {
|
|
fn := d.getTagFn(string(tag[1:]))
|
|
if fn == nil {
|
|
var t Tag
|
|
t.Tagname = string(tag[1:])
|
|
t.Value = d.valueInterface()
|
|
return t
|
|
} else if fn.Type().Kind() != reflect.Func {
|
|
res := reflect.New(fn.Type()).Elem()
|
|
d.value(res)
|
|
return res.Interface()
|
|
} else {
|
|
tfn := fn.Type()
|
|
val := reflect.New(tfn.In(0))
|
|
d.value(val)
|
|
res := fn.Call([]reflect.Value{val.Elem()})
|
|
if err, ok := res[1].Interface().(error); ok && err != nil {
|
|
d.error(err)
|
|
}
|
|
return res[0].Interface()
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) valueInterface() interface{} {
|
|
bs, ttype, err := d.nextToken()
|
|
// check error first
|
|
if err != nil {
|
|
d.error(err)
|
|
return nil /// won't get here
|
|
}
|
|
switch ttype {
|
|
default:
|
|
d.error(errUnexpected)
|
|
return nil
|
|
case tokenSymbol, tokenKeyword, tokenString, tokenInt, tokenFloat, tokenChar:
|
|
return d.literalInterface(bs, ttype)
|
|
case tokenTag:
|
|
return d.tagInterface(bs)
|
|
case tokenListStart:
|
|
return d.arrayInterface(tokenListEnd)
|
|
case tokenVectorStart:
|
|
return d.arrayInterface(tokenVectorEnd)
|
|
case tokenSetStart:
|
|
return d.setInterface()
|
|
case tokenMapStart:
|
|
return d.ednmapInterface()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Decoder) ednmap(v reflect.Value) {
|
|
// Check for unmarshaler.
|
|
u, pv := d.indirect(v, false)
|
|
if u != nil {
|
|
d.doUndo([]byte{'{'}, tokenMapStart)
|
|
bs, err := d.nextValueBytes()
|
|
if err == nil {
|
|
err = u.UnmarshalEDN(bs)
|
|
}
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return
|
|
}
|
|
v = pv
|
|
|
|
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(d.ednmapInterface()))
|
|
return
|
|
}
|
|
|
|
var keyType reflect.Type
|
|
|
|
// Check type of target: Struct or map[T]U
|
|
switch v.Kind() {
|
|
case reflect.Map:
|
|
t := v.Type()
|
|
keyType = t.Key()
|
|
if v.IsNil() {
|
|
v.Set(reflect.MakeMap(t))
|
|
}
|
|
case reflect.Struct:
|
|
|
|
default:
|
|
d.error(&UnmarshalTypeError{"map", v.Type()})
|
|
}
|
|
|
|
// separate these to ease reading (theoretically fewer checks too)
|
|
if v.Kind() == reflect.Struct {
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
skip := false
|
|
var key []byte
|
|
// The key can either be a symbol, a keyword or a string. We will skip
|
|
// anything that is not any of these values.
|
|
switch tt {
|
|
case tokenSymbol:
|
|
if bytes.Equal(bs, falseByte) || bytes.Equal(bs, trueByte) || bytes.Equal(bs, nilByte) {
|
|
skip = true
|
|
}
|
|
key = bs
|
|
case tokenKeyword:
|
|
key = bs[1:]
|
|
case tokenString:
|
|
k, ok := unquoteBytes(bs)
|
|
key = k
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
default:
|
|
skip = true
|
|
}
|
|
|
|
if skip { // will panic if something bad happens, so this is fine
|
|
d.valueInterface()
|
|
continue
|
|
}
|
|
|
|
var subv reflect.Value
|
|
var f *field
|
|
fields := cachedTypeFields(v.Type())
|
|
for i := range fields {
|
|
ff := &fields[i]
|
|
if bytes.Equal(ff.nameBytes, key) {
|
|
f = ff
|
|
break
|
|
}
|
|
if f == nil && ff.equalFold(ff.nameBytes, key) {
|
|
f = ff
|
|
}
|
|
}
|
|
if f != nil {
|
|
subv = v
|
|
for _, i := range f.index {
|
|
if subv.Kind() == reflect.Ptr {
|
|
if subv.IsNil() {
|
|
subv.Set(reflect.New(subv.Type().Elem()))
|
|
}
|
|
subv = subv.Elem()
|
|
}
|
|
subv = subv.Field(i)
|
|
}
|
|
} else if d.disallowUnknownFields {
|
|
d.error(&UnknownFieldError{string(key), v.Type()})
|
|
}
|
|
// If subv not set, value() will just skip.
|
|
d.value(subv)
|
|
}
|
|
// if not struct, then it is a map
|
|
} else if keyType.Kind() == reflect.Interface && keyType.NumMethod() == 0 {
|
|
// special case for unhashable key types
|
|
var mapElem reflect.Value
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
|
|
key := d.valueInterface()
|
|
elemType := v.Type().Elem()
|
|
if !mapElem.IsValid() {
|
|
mapElem = reflect.New(elemType).Elem()
|
|
} else {
|
|
mapElem.Set(reflect.Zero(elemType))
|
|
}
|
|
subv := mapElem
|
|
d.value(subv)
|
|
|
|
if key == nil {
|
|
v.SetMapIndex(reflect.New(keyType).Elem(), subv)
|
|
} else {
|
|
switch reflect.TypeOf(key).Kind() {
|
|
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
|
|
v.SetMapIndex(reflect.ValueOf(&key), subv)
|
|
default:
|
|
v.SetMapIndex(reflect.ValueOf(key), subv)
|
|
}
|
|
}
|
|
}
|
|
} else { // default map case
|
|
var mapElem reflect.Value
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
|
|
// should we do the same as with mapElem?
|
|
key := reflect.New(keyType).Elem()
|
|
d.value(key)
|
|
|
|
elemType := v.Type().Elem()
|
|
if !mapElem.IsValid() {
|
|
mapElem = reflect.New(elemType).Elem()
|
|
} else {
|
|
mapElem.Set(reflect.Zero(elemType))
|
|
}
|
|
subv := mapElem
|
|
d.value(subv)
|
|
v.SetMapIndex(key, subv)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) ednmapInterface() interface{} {
|
|
theMap := make(map[interface{}]interface{}, 0)
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenMapEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
key := d.valueInterface()
|
|
value := d.valueInterface()
|
|
// special case on nil here. nil is hashable, so use it as key.
|
|
if key == nil {
|
|
theMap[key] = value
|
|
} else {
|
|
switch reflect.TypeOf(key).Kind() {
|
|
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
|
|
theMap[&key] = value
|
|
default:
|
|
theMap[key] = value
|
|
}
|
|
}
|
|
}
|
|
return theMap
|
|
}
|
|
|
|
func (d *Decoder) set(v reflect.Value) {
|
|
// Check for unmarshaler.
|
|
u, pv := d.indirect(v, false)
|
|
if u != nil {
|
|
d.doUndo([]byte{'#', '{'}, tokenSetStart)
|
|
bs, err := d.nextValueBytes()
|
|
if err == nil {
|
|
err = u.UnmarshalEDN(bs)
|
|
}
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return
|
|
}
|
|
v = pv
|
|
|
|
var setValue reflect.Value
|
|
var keyType reflect.Type
|
|
|
|
// Check type of target.
|
|
// TODO: accept option structs? -- i.e. structs where all fields are bools
|
|
// TODO: Also accept slices
|
|
switch v.Kind() {
|
|
case reflect.Map:
|
|
// map must have bool or struct{} value type
|
|
t := v.Type()
|
|
keyType = t.Key()
|
|
valKind := t.Elem().Kind()
|
|
switch valKind {
|
|
case reflect.Bool:
|
|
setValue = reflect.ValueOf(true)
|
|
case reflect.Struct:
|
|
// check if struct, and if so, ensure it has 0 fields
|
|
if t.Elem().NumField() != 0 {
|
|
d.error(&UnmarshalTypeError{"set", v.Type()})
|
|
}
|
|
setValue = reflect.Zero(t.Elem())
|
|
default:
|
|
d.error(&UnmarshalTypeError{"set", v.Type()})
|
|
}
|
|
if v.IsNil() {
|
|
v.Set(reflect.MakeMap(t))
|
|
}
|
|
case reflect.Slice, reflect.Array:
|
|
// Some extent of rechecking going on when we pass it to array, but it
|
|
// should be a constant factor only.
|
|
d.array(v, tokenSetEnd)
|
|
return
|
|
case reflect.Interface:
|
|
if v.NumMethod() == 0 {
|
|
// break out and use setInterface
|
|
v.Set(reflect.ValueOf(d.setInterface()))
|
|
return
|
|
} else {
|
|
d.error(&UnmarshalTypeError{"set", v.Type()})
|
|
}
|
|
|
|
default:
|
|
d.error(&UnmarshalTypeError{"set", v.Type()})
|
|
}
|
|
|
|
// special case here, to avoid panics when we have slices and maps as keys.
|
|
// Split out from code below to improve perf
|
|
if keyType.Kind() == reflect.Interface && keyType.NumMethod() == 0 {
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
key := d.valueInterface()
|
|
// special case on nil here: Need to create a zero type of the specific
|
|
// keyType. As this is an interface, this will itself be nil.
|
|
if key == nil {
|
|
v.SetMapIndex(reflect.New(keyType).Elem(), setValue)
|
|
} else {
|
|
switch reflect.TypeOf(key).Kind() {
|
|
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
|
|
v.SetMapIndex(reflect.ValueOf(&key), setValue)
|
|
default:
|
|
v.SetMapIndex(reflect.ValueOf(key), setValue)
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
|
|
key := reflect.New(keyType).Elem()
|
|
d.value(key)
|
|
v.SetMapIndex(key, setValue)
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func (d *Decoder) setInterface() interface{} {
|
|
theSet := make(map[interface{}]bool, 0)
|
|
for {
|
|
bs, tt, err := d.nextToken()
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
if tt == tokenSetEnd {
|
|
break
|
|
}
|
|
d.doUndo(bs, tt)
|
|
key := d.valueInterface()
|
|
if key == nil {
|
|
theSet[key] = true
|
|
} else {
|
|
switch reflect.TypeOf(key).Kind() {
|
|
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
|
|
theSet[&key] = true
|
|
default:
|
|
theSet[key] = true
|
|
}
|
|
}
|
|
}
|
|
return theSet
|
|
}
|
|
|
|
var nilByte = []byte(`nil`)
|
|
var trueByte = []byte(`true`)
|
|
var falseByte = []byte(`false`)
|
|
|
|
var symbolType = reflect.TypeOf(Symbol(""))
|
|
var keywordType = reflect.TypeOf(Keyword(""))
|
|
var byteSliceType = reflect.TypeOf([]byte(nil))
|
|
|
|
var bigFloatType = reflect.TypeOf((*big.Float)(nil)).Elem()
|
|
var bigIntType = reflect.TypeOf((*big.Int)(nil)).Elem()
|
|
|
|
func (d *Decoder) literal(bs []byte, ttype tokenType, v reflect.Value) {
|
|
wantptr := ttype == tokenSymbol && bytes.Equal(nilByte, bs)
|
|
u, pv := d.indirect(v, wantptr)
|
|
if u != nil {
|
|
err := u.UnmarshalEDN(bs)
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return
|
|
}
|
|
v = pv
|
|
switch ttype {
|
|
case tokenSymbol:
|
|
if wantptr { // nil
|
|
switch v.Kind() {
|
|
case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
|
|
v.Set(reflect.Zero(v.Type()))
|
|
default:
|
|
d.error(&UnmarshalTypeError{"nil", v.Type()})
|
|
}
|
|
} else if bytes.Equal(trueByte, bs) || bytes.Equal(falseByte, bs) { // true|false
|
|
value := bs[0] == 't'
|
|
switch v.Kind() {
|
|
default:
|
|
d.error(&UnmarshalTypeError{"bool", v.Type()})
|
|
case reflect.Bool:
|
|
v.SetBool(value)
|
|
case reflect.Interface:
|
|
if v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(value))
|
|
} else {
|
|
d.error(&UnmarshalTypeError{"bool", v.Type()})
|
|
}
|
|
}
|
|
} else if v.Kind() == reflect.String && v.Type() == symbolType { // "actual" symbols
|
|
v.SetString(string(bs))
|
|
} else if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(Symbol(string(bs))))
|
|
} else {
|
|
d.error(&UnmarshalTypeError{"symbol", v.Type()})
|
|
}
|
|
case tokenKeyword:
|
|
if v.Kind() == reflect.String && v.Type() == keywordType { // "actual" keywords
|
|
v.SetString(string(bs[1:]))
|
|
} else if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(Keyword(string(bs[1:]))))
|
|
} else {
|
|
d.error(&UnmarshalTypeError{"keyword", v.Type()})
|
|
}
|
|
case tokenInt:
|
|
var s string
|
|
isBig := false
|
|
if bs[len(bs)-1] == 'N' { // can end with N, which we promptly ignore
|
|
// TODO: If the user expects a float and receives what is perceived as an
|
|
// int (ends with N), what is the sensible thing to do?
|
|
s = string(bs[:len(bs)-1])
|
|
isBig = true
|
|
} else {
|
|
s = string(bs)
|
|
}
|
|
switch v.Kind() {
|
|
default:
|
|
switch v.Type() {
|
|
case bigIntType:
|
|
bi := v.Addr().Interface().(*big.Int)
|
|
_, ok := bi.SetString(s, 10)
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
case bigFloatType:
|
|
mc := d.mathContext()
|
|
bf := v.Addr().Interface().(*big.Float)
|
|
bf = bf.SetPrec(mc.Precision).SetMode(mc.Mode)
|
|
_, _, err := bf.Parse(s, 10)
|
|
if err != nil { // grumble grumble
|
|
d.error(errInternal)
|
|
}
|
|
default:
|
|
d.error(&UnmarshalTypeError{"int", v.Type()})
|
|
}
|
|
case reflect.Interface:
|
|
if !isBig {
|
|
n, err := strconv.ParseInt(s, 10, 64)
|
|
if err != nil {
|
|
d.error(&UnmarshalTypeError{"int " + s, reflect.TypeOf(int64(0))})
|
|
}
|
|
if v.NumMethod() != 0 {
|
|
d.error(&UnmarshalTypeError{"int", v.Type()})
|
|
}
|
|
v.Set(reflect.ValueOf(n))
|
|
} else {
|
|
bi := new(big.Int)
|
|
_, ok := bi.SetString(s, 10)
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
v.Set(reflect.ValueOf(bi))
|
|
}
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
n, err := strconv.ParseInt(s, 10, 64)
|
|
if err != nil || v.OverflowInt(n) {
|
|
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
|
|
}
|
|
v.SetInt(n)
|
|
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
|
n, err := strconv.ParseUint(s, 10, 64)
|
|
if err != nil || v.OverflowUint(n) {
|
|
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
|
|
}
|
|
v.SetUint(n)
|
|
|
|
case reflect.Float32, reflect.Float64:
|
|
n, err := strconv.ParseFloat(s, v.Type().Bits())
|
|
if err != nil || v.OverflowFloat(n) {
|
|
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
|
|
}
|
|
v.SetFloat(n)
|
|
}
|
|
|
|
case tokenFloat:
|
|
var s string
|
|
isBig := false
|
|
if bs[len(bs)-1] == 'M' { // can end with M, which we promptly ignore
|
|
s = string(bs[:len(bs)-1])
|
|
isBig = true
|
|
} else {
|
|
s = string(bs)
|
|
}
|
|
switch v.Kind() {
|
|
default:
|
|
switch v.Type() {
|
|
case bigFloatType:
|
|
mc := d.mathContext()
|
|
bf := v.Addr().Interface().(*big.Float)
|
|
bf = bf.SetPrec(mc.Precision).SetMode(mc.Mode)
|
|
_, _, err := bf.Parse(s, 10)
|
|
if err != nil { // grumble grumble
|
|
d.error(errInternal)
|
|
}
|
|
default:
|
|
d.error(&UnmarshalTypeError{"float", v.Type()})
|
|
}
|
|
case reflect.Interface:
|
|
if !isBig {
|
|
n, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
d.error(&UnmarshalTypeError{"float " + s, reflect.TypeOf(float64(0))})
|
|
}
|
|
if v.NumMethod() != 0 {
|
|
d.error(&UnmarshalTypeError{"float", v.Type()})
|
|
}
|
|
v.Set(reflect.ValueOf(n))
|
|
} else {
|
|
mc := d.mathContext()
|
|
bf := new(big.Float).SetPrec(mc.Precision).SetMode(mc.Mode)
|
|
_, _, err := bf.Parse(s, 10)
|
|
if err != nil { // grumble grumble
|
|
d.error(errInternal)
|
|
}
|
|
v.Set(reflect.ValueOf(bf))
|
|
}
|
|
case reflect.Float32, reflect.Float64:
|
|
n, err := strconv.ParseFloat(s, v.Type().Bits())
|
|
if err != nil || v.OverflowFloat(n) {
|
|
d.error(&UnmarshalTypeError{"float " + s, v.Type()})
|
|
}
|
|
v.SetFloat(n)
|
|
}
|
|
case tokenChar:
|
|
r, err := toRune(bs)
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
switch v.Kind() {
|
|
default:
|
|
d.error(&UnmarshalTypeError{"rune", v.Type()})
|
|
case reflect.Interface:
|
|
if v.NumMethod() != 0 {
|
|
d.error(&UnmarshalTypeError{"rune", v.Type()})
|
|
}
|
|
v.Set(reflect.ValueOf(r))
|
|
case reflect.Int32: // rune is an alias for int32
|
|
v.SetInt(int64(r))
|
|
}
|
|
case tokenString:
|
|
s, ok := unquoteBytes(bs)
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
switch v.Kind() {
|
|
default:
|
|
d.error(&UnmarshalTypeError{"string", v.Type()})
|
|
case reflect.String:
|
|
v.SetString(string(s))
|
|
case reflect.Interface:
|
|
if v.NumMethod() == 0 {
|
|
v.Set(reflect.ValueOf(string(s)))
|
|
} else {
|
|
d.error(&UnmarshalTypeError{"string", v.Type()})
|
|
}
|
|
}
|
|
default:
|
|
d.error(errInternal)
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) literalInterface(bs []byte, ttype tokenType) interface{} {
|
|
switch ttype {
|
|
case tokenSymbol:
|
|
if bytes.Equal(nilByte, bs) {
|
|
return nil
|
|
}
|
|
if bytes.Equal(trueByte, bs) {
|
|
return true
|
|
}
|
|
if bytes.Equal(falseByte, bs) {
|
|
return false
|
|
}
|
|
return Symbol(string(bs))
|
|
case tokenKeyword:
|
|
return Keyword(string(bs[1:]))
|
|
case tokenInt:
|
|
if bs[len(bs)-1] == 'N' { // can end with N
|
|
var bi big.Int
|
|
s := string(bs[:len(bs)-1])
|
|
_, ok := bi.SetString(s, 10)
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
return bi
|
|
} else {
|
|
s := string(bs)
|
|
n, err := strconv.ParseInt(s, 10, 64)
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return n
|
|
}
|
|
case tokenFloat:
|
|
var s string
|
|
if bs[len(bs)-1] == 'M' { // can end with M, which we promptly ignore
|
|
s = string(bs[:len(bs)-1])
|
|
} else {
|
|
s = string(bs)
|
|
}
|
|
n, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return n
|
|
case tokenChar:
|
|
r, err := toRune(bs)
|
|
if err != nil {
|
|
d.error(err)
|
|
}
|
|
return r
|
|
case tokenString:
|
|
t, ok := unquote(bs)
|
|
if !ok {
|
|
d.error(errInternal)
|
|
}
|
|
return t
|
|
default:
|
|
d.error(errInternal)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
var (
|
|
newlineBytes = []byte(`\newline`)
|
|
returnBytes = []byte(`\return`)
|
|
spaceBytes = []byte(`\space`)
|
|
tabBytes = []byte(`\tab`)
|
|
formfeedBytes = []byte(`\formfeed`)
|
|
)
|
|
|
|
func toRune(bs []byte) (rune, error) {
|
|
// handle special cases first:
|
|
switch {
|
|
case bytes.Equal(bs, newlineBytes):
|
|
return '\n', nil
|
|
case bytes.Equal(bs, returnBytes):
|
|
return '\r', nil
|
|
case bytes.Equal(bs, spaceBytes):
|
|
return ' ', nil
|
|
case bytes.Equal(bs, tabBytes):
|
|
return '\t', nil
|
|
case bytes.Equal(bs, formfeedBytes):
|
|
return '\f', nil
|
|
case len(bs) == 6 && bs[1] == 'u': // I don't think unicode chars could be 5 bytes long?
|
|
return getu4(bs), nil
|
|
default:
|
|
r, size := utf8.DecodeRune(bs[1:])
|
|
if r == utf8.RuneError && size == 1 {
|
|
return r, errIllegalRune
|
|
}
|
|
return r, nil
|
|
}
|
|
}
|
|
|
|
// nextToken handles #_
|
|
func (d *Decoder) nextToken() ([]byte, tokenType, error) {
|
|
bs, tt, err := d.rawToken()
|
|
if err != nil {
|
|
return bs, tt, err
|
|
}
|
|
switch tt {
|
|
case tokenDiscard:
|
|
err := d.traverseValue()
|
|
if err != nil {
|
|
return nil, tokenError, err
|
|
}
|
|
return d.nextToken() // again for discards
|
|
default:
|
|
return bs, tt, err
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) rawToken() ([]byte, tokenType, error) {
|
|
if d.undo {
|
|
d.undo = false
|
|
b := d.prevSlice
|
|
tt := d.prevTtype
|
|
d.prevSlice = nil
|
|
d.prevTtype = tokenError
|
|
return b, tt, nil
|
|
}
|
|
var val bytes.Buffer
|
|
d.lex.reset()
|
|
doIgnore := true
|
|
if d.hasLeftover {
|
|
d.hasLeftover = false
|
|
d.lex.position++
|
|
switch d.lex.state(d.leftover) {
|
|
case lexCont:
|
|
val.WriteRune(d.leftover)
|
|
doIgnore = false
|
|
case lexEnd:
|
|
val.WriteRune(d.leftover)
|
|
return val.Bytes(), d.lex.token, nil
|
|
case lexEndPrev:
|
|
return nil, tokenError, errInternal
|
|
case lexError:
|
|
return nil, tokenError, d.lex.err
|
|
case lexIgnore:
|
|
// just ignore
|
|
}
|
|
}
|
|
if doIgnore { // ignore whitespace
|
|
readWhitespace:
|
|
for {
|
|
r, _, err := d.rd.ReadRune()
|
|
if err == io.EOF {
|
|
return nil, tokenError, errNoneLeft
|
|
}
|
|
if err != nil {
|
|
return nil, tokenError, err
|
|
}
|
|
d.lex.position++
|
|
switch d.lex.state(r) {
|
|
case lexCont: // got a value, so continue on past doIgnoring
|
|
// TODO: This returns an error. Will it happen in practice? Probably?
|
|
val.WriteRune(r)
|
|
break readWhitespace
|
|
case lexError:
|
|
return nil, tokenError, d.lex.err
|
|
case lexEnd:
|
|
val.WriteRune(r)
|
|
return val.Bytes(), d.lex.token, nil
|
|
case lexEndPrev:
|
|
return nil, tokenError, errInternal
|
|
case lexIgnore:
|
|
// keep on reading
|
|
}
|
|
}
|
|
}
|
|
for {
|
|
r, _, err := d.rd.ReadRune()
|
|
var ls lexState
|
|
// this is not exactly perfect.
|
|
switch {
|
|
case err == io.EOF:
|
|
ls = d.lex.eof()
|
|
case err != nil:
|
|
return nil, tokenError, err
|
|
default:
|
|
d.lex.position++
|
|
ls = d.lex.state(r)
|
|
}
|
|
switch ls {
|
|
case lexCont:
|
|
val.WriteRune(r)
|
|
case lexIgnore:
|
|
if err != io.EOF {
|
|
return nil, tokenError, errInternal
|
|
} else {
|
|
return nil, tokenError, errNoneLeft
|
|
}
|
|
case lexEnd:
|
|
if err != io.EOF {
|
|
val.WriteRune(r)
|
|
}
|
|
return val.Bytes(), d.lex.token, nil
|
|
case lexEndPrev:
|
|
d.hasLeftover = true
|
|
d.leftover = r
|
|
return val.Bytes(), d.lex.token, nil
|
|
case lexError:
|
|
return nil, tokenError, d.lex.err
|
|
}
|
|
}
|
|
}
|
|
|
|
// traverseValue reads a single value and skips it -- whether it is a list, map
|
|
// or a literal. Doesn't validate its state. skips over discard tokens as well.
|
|
func (d *Decoder) traverseValue() error {
|
|
tstack := newTokenStack()
|
|
for {
|
|
_, tt, err := d.nextToken()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = tstack.push(tt)
|
|
if err != nil || tstack.done() {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
type tokenStackElem struct {
|
|
tt tokenType
|
|
count int
|
|
}
|
|
|
|
type tokenStack struct {
|
|
toks []tokenStackElem
|
|
toplevel tokenType
|
|
}
|
|
|
|
func newTokenStack() *tokenStack {
|
|
return &tokenStack{
|
|
toks: nil,
|
|
toplevel: tokenError,
|
|
}
|
|
}
|
|
|
|
func (t *tokenStack) done() bool {
|
|
return len(t.toks) == 0 && t.toplevel != tokenDiscard
|
|
}
|
|
|
|
func (t *tokenStack) peek() tokenType {
|
|
return t.toks[len(t.toks)-1].tt
|
|
}
|
|
|
|
func (t *tokenStack) peekCount() int {
|
|
return t.toks[len(t.toks)-1].count
|
|
}
|
|
|
|
func (t *tokenStack) pop() {
|
|
t.toks = t.toks[:len(t.toks)-1]
|
|
}
|
|
|
|
func (t *tokenStack) push(tt tokenType) error {
|
|
// retain toplevel value for done check
|
|
if len(t.toks) == 0 {
|
|
t.toplevel = tt
|
|
}
|
|
switch tt {
|
|
case tokenMapStart, tokenVectorStart, tokenListStart, tokenSetStart, tokenDiscard, tokenTag:
|
|
// append to toks, regardless
|
|
t.toks = append(t.toks, tokenStackElem{tt, 0})
|
|
return nil
|
|
case tokenMapEnd:
|
|
if len(t.toks) == 0 || (t.peek() != tokenMapStart && t.peek() != tokenSetStart) {
|
|
return errUnexpected
|
|
}
|
|
t.pop()
|
|
case tokenListEnd:
|
|
if len(t.toks) == 0 || t.peek() != tokenListStart {
|
|
return errUnexpected
|
|
}
|
|
t.pop()
|
|
case tokenVectorEnd:
|
|
if len(t.toks) == 0 || t.peek() != tokenVectorStart {
|
|
return errUnexpected
|
|
}
|
|
t.pop()
|
|
default:
|
|
}
|
|
if len(t.toks) > 0 {
|
|
t.toks[len(t.toks)-1].count++
|
|
}
|
|
// popping of discards and tags
|
|
for len(t.toks) > 0 && t.peek() == tokenTag {
|
|
t.pop()
|
|
if len(t.toks) > 0 {
|
|
t.toks[len(t.toks)-1].count++
|
|
}
|
|
}
|
|
if len(t.toks) > 0 && t.peek() == tokenDiscard {
|
|
t.pop()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// more removes whitespace and discards, and returns nil if there is more data.
|
|
// If the end of the stream is found, io.EOF is sent back. If an error happens
|
|
// while parsing a discard value, it is passed up.
|
|
func (d *Decoder) more() error {
|
|
if d.undo {
|
|
return nil
|
|
}
|
|
if d.hasLeftover && d.leftover == '#' {
|
|
// check if next rune is '_'
|
|
r, _, err := d.rd.ReadRune()
|
|
if err == io.EOF {
|
|
return errNoneLeft
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if r != '_' {
|
|
// it's not discard, so let's just unread the rune
|
|
return d.rd.UnreadRune()
|
|
}
|
|
// need to consume a value
|
|
d.hasLeftover = false
|
|
d.leftover = '\uFFFD'
|
|
d.lex.position += 2
|
|
err = d.traverseValue()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return d.more()
|
|
}
|
|
if d.hasLeftover && !isWhitespace(d.leftover) && d.leftover != ';' {
|
|
return nil
|
|
}
|
|
|
|
// If we've come to this step, we need to read whitespace and -- if we find
|
|
// something suspicious, we need to check if it can be assumed to be
|
|
// whitespace.
|
|
d.lex.reset()
|
|
for {
|
|
var r rune
|
|
var err error
|
|
readWhitespace:
|
|
for {
|
|
r, _, err = d.rd.ReadRune()
|
|
if err != nil {
|
|
return err
|
|
// if we hit the end of the line, then we don't have more and we return
|
|
// io.EOF
|
|
}
|
|
d.lex.position++
|
|
switch d.lex.state(r) {
|
|
case lexCont: // found something that looks like a value, so break out of whitespace loop
|
|
break readWhitespace
|
|
case lexError:
|
|
return d.lex.err
|
|
case lexEnd: // found a delimiter of some sort, so store it as leftover and return nil
|
|
d.hasLeftover = true
|
|
d.leftover = r
|
|
d.lex.position--
|
|
return nil
|
|
case lexEndPrev:
|
|
return errInternal
|
|
case lexIgnore:
|
|
// keep on readin'
|
|
}
|
|
}
|
|
|
|
if r == '#' { // the edge case again, so let's gobble
|
|
// check if next rune is '_'
|
|
r, _, err := d.rd.ReadRune()
|
|
if err == io.EOF {
|
|
return errNoneLeft
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if r != '_' {
|
|
// it's not discard, so we unread the rune and put # as leftover
|
|
d.leftover = '#'
|
|
d.hasLeftover = true
|
|
d.lex.position--
|
|
return d.rd.UnreadRune()
|
|
}
|
|
// need to consume a value
|
|
d.hasLeftover = false
|
|
d.leftover = '\uFFFD'
|
|
d.lex.position += 2
|
|
err = d.traverseValue()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return d.more()
|
|
} else { // we could do unreadrune here too, would've been just as fine
|
|
d.hasLeftover = true
|
|
d.leftover = r
|
|
d.lex.position--
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Oh, asking about why this is so similar to the part above, eh? Yes, I would
|
|
// also consider this a crime. At least I use the same lexer. This is probably
|
|
// next on the list when I have people complaining about perf issues.
|
|
func (d *Decoder) nextValueBytes() ([]byte, error) {
|
|
// TODO: Ensure values inside maps come in pairs.
|
|
tstack := newTokenStack()
|
|
var val bytes.Buffer
|
|
if d.undo {
|
|
d.undo = false
|
|
b := d.prevSlice
|
|
tt := d.prevTtype
|
|
d.prevSlice = nil
|
|
d.prevTtype = tokenError
|
|
if tt == tokenDiscard { // should be impossible to get a tokenDiscard here?
|
|
return nil, errInternal
|
|
}
|
|
err := tstack.push(tt)
|
|
if err != nil || tstack.done() {
|
|
return val.Bytes(), err
|
|
}
|
|
val.Write(b)
|
|
}
|
|
readElems:
|
|
for {
|
|
d.lex.reset()
|
|
// Can't ignore whitespace in general. So I guess we just add it onto the buffer
|
|
readWs := true
|
|
if d.hasLeftover {
|
|
// we can have leftover from previous iteration. e.g. "foo[bar]" will have
|
|
// leftover "[" and "]"
|
|
d.hasLeftover = false
|
|
d.lex.position++
|
|
val.WriteRune(d.leftover)
|
|
switch d.lex.state(d.leftover) {
|
|
case lexCont:
|
|
readWs = false
|
|
case lexEnd:
|
|
err := tstack.push(d.lex.token)
|
|
if err != nil || tstack.done() {
|
|
return val.Bytes(), err
|
|
}
|
|
d.lex.reset()
|
|
case lexEndPrev:
|
|
return nil, errInternal
|
|
case lexError:
|
|
return nil, d.lex.err
|
|
case lexIgnore:
|
|
// just keep going
|
|
}
|
|
}
|
|
if readWs {
|
|
readWhitespace:
|
|
// If we end up here, it means we expect at least one more token
|
|
for {
|
|
r, _, err := d.rd.ReadRune()
|
|
if err == io.EOF {
|
|
return nil, errNoneLeft
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.lex.position++
|
|
val.WriteRune(r)
|
|
switch d.lex.state(r) {
|
|
case lexCont: // found something that looks like a value, so break out of whitespace loop
|
|
break readWhitespace
|
|
case lexError:
|
|
return nil, d.lex.err
|
|
case lexEnd:
|
|
err := tstack.push(d.lex.token)
|
|
if err != nil || tstack.done() {
|
|
return val.Bytes(), err
|
|
}
|
|
// Here we'd usually continue on next iteration loop (which is safe
|
|
// and valid), but since we know we don't have any leftovers, we can
|
|
// just reset the lexer and keep attempting to read whitespace.
|
|
d.lex.reset()
|
|
case lexEndPrev:
|
|
return nil, errInternal
|
|
case lexIgnore:
|
|
// keep on readin'
|
|
}
|
|
}
|
|
}
|
|
// read element
|
|
for {
|
|
r, rlength, err := d.rd.ReadRune()
|
|
var ls lexState
|
|
// ugh, this is not exactly perfect.
|
|
switch {
|
|
case err == io.EOF:
|
|
ls = d.lex.eof()
|
|
case err != nil:
|
|
return nil, err
|
|
default:
|
|
d.lex.position++
|
|
val.WriteRune(r)
|
|
ls = d.lex.state(r)
|
|
}
|
|
switch ls {
|
|
case lexCont:
|
|
// keep going
|
|
case lexIgnore:
|
|
if err != io.EOF {
|
|
return nil, errInternal
|
|
} else {
|
|
return nil, errNoneLeft
|
|
}
|
|
case lexEnd:
|
|
ioErr := err
|
|
err := tstack.push(d.lex.token)
|
|
if err != nil || tstack.done() {
|
|
return val.Bytes(), err
|
|
}
|
|
if ioErr == io.EOF /* && !tstack.done() */ {
|
|
return nil, errNoneLeft
|
|
}
|
|
continue readElems
|
|
case lexEndPrev: // if err == io.EOF then we cannot end up here. (Invariant forced by lexer)
|
|
val.Truncate(val.Len() - rlength)
|
|
d.hasLeftover = true
|
|
d.leftover = r
|
|
|
|
err := tstack.push(d.lex.token)
|
|
if err != nil || tstack.done() {
|
|
return val.Bytes(), err
|
|
}
|
|
continue readElems
|
|
case lexError:
|
|
return nil, d.lex.err
|
|
}
|
|
}
|
|
}
|
|
}
|