2022-03-02 17:46:16 -04:00

1679 lines
41 KiB
Go

// Copyright 2015 Jean Niklas L'orange. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package edn
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"math/big"
"reflect"
"runtime"
"strconv"
"strings"
"unicode/utf8"
)
var (
errInternal = errors.New("Illegal internal parse state")
errNoneLeft = errors.New("No more tokens to read")
errUnexpected = errors.New("Unexpected token")
errIllegalRune = errors.New("Illegal rune form")
)
type UnknownTagError struct {
tag []byte
value []byte
inType reflect.Type
}
func (ute UnknownTagError) Error() string {
return fmt.Sprintf("Unable to decode %s%s into %s", string(ute.tag),
string(ute.value), ute.inType)
}
// Unmarshal parses the EDN-encoded data and stores the result in the value
// pointed to by v.
//
// Unmarshal uses the inverse of the encodings that Marshal uses, allocating
// maps, slices, and pointers as necessary, with the following additional rules:
//
// First, if the value to store the result into implements edn.Unmarshaler, it
// is called.
//
// If the value is tagged and the tag is known, the EDN value is translated into
// the input of the tag convert function. If no error happens during converting,
// the result of the conversion is then coerced into v if possible.
//
// To unmarshal EDN into a pointer, Unmarshal first handles the case of the EDN
// being the EDN literal nil. In that case, Unmarshal sets the pointer to nil.
// Otherwise, Unmarshal unmarshals the EDN into the value pointed at by the
// pointer. If the pointer is nil, Unmarshal allocates a new value for it to
// point to.
//
// To unmarshal EDN into a struct, Unmarshal matches incoming object
// keys to the keys used by Marshal (either the struct field name or its tag),
// preferring an exact match but also accepting a case-insensitive match.
//
// To unmarshal EDN into an interface value,
// Unmarshal stores one of these in the interface value:
//
// bool, for EDN booleans
// float64, for EDN floats
// int64, for EDN integers
// int32, for EDN characters
// string, for EDN strings
// []interface{}, for EDN vectors and lists
// map[interface{}]interface{}, for EDN maps
// map[interface{}]bool, for EDN sets
// nil for EDN nil
// edn.Tag for unknown EDN tagged elements
// T for known EDN tagged elements, where T is the result of the converter function
//
// To unmarshal an EDN vector/list into a slice, Unmarshal resets the slice to
// nil and then appends each element to the slice.
//
// To unmarshal an EDN map into a Go map, Unmarshal replaces the map
// with an empty map and then adds key-value pairs from the object to
// the map.
//
// If a EDN value is not appropriate for a given target type, or if a EDN number
// overflows the target type, Unmarshal skips that field and completes the
// unmarshalling as best it can. If no more serious errors are encountered,
// Unmarshal returns an UnmarshalTypeError describing the earliest such error.
//
// The EDN nil value unmarshals into an interface, map, pointer, or slice by
// setting that Go value to nil.
//
// When unmarshaling strings, invalid UTF-8 or invalid UTF-16 surrogate pairs
// are not treated as an error. Instead, they are replaced by the Unicode
// replacement character U+FFFD.
//
func Unmarshal(data []byte, v interface{}) error {
return newDecoder(bufio.NewReader(bytes.NewBuffer(data))).Decode(v)
}
// UnmarshalString works like Unmarshal, but accepts a string as input instead
// of a byte slice.
func UnmarshalString(data string, v interface{}) error {
return newDecoder(bufio.NewReader(bytes.NewBufferString(data))).Decode(v)
}
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may read data from r beyond the
// EDN values requested.
func NewDecoder(r io.Reader) *Decoder {
return newDecoder(bufio.NewReader(r))
}
// Buffered returns a reader of the data remaining in the Decoder's buffer. The
// reader is valid until the next call to Decode.
func (d *Decoder) Buffered() *bufio.Reader {
return d.rd
}
// AddTagFn adds a tag function to the decoder's TagMap. Note that TagMaps are
// mutable: If Decoder A and B share TagMap, then adding a tag function to one
// may modify both.
func (d *Decoder) AddTagFn(tagname string, fn interface{}) error {
return d.tagmap.AddTagFn(tagname, fn)
}
// MustAddTagFn adds a tag function to the decoder's TagMap like AddTagFn,
// except this function also panics if the tag could not be added.
func (d *Decoder) MustAddTagFn(tagname string, fn interface{}) {
d.tagmap.MustAddTagFn(tagname, fn)
}
// AddTagStruct adds a tag struct to the decoder's TagMap. Note that TagMaps are
// mutable: If Decoder A and B share TagMap, then adding a tag struct to one
// may modify both.
func (d *Decoder) AddTagStruct(tagname string, example interface{}) error {
return d.tagmap.AddTagStruct(tagname, example)
}
// UseTagMap sets the TagMap provided as the TagMap for this decoder.
func (d *Decoder) UseTagMap(tm *TagMap) {
d.tagmap = tm
}
// UseMathContext sets the given math context as default math context for this
// decoder.
func (d *Decoder) UseMathContext(mc MathContext) {
d.mc = &mc
}
func (d *Decoder) mathContext() *MathContext {
if d.mc != nil {
return d.mc
}
return &GlobalMathContext
}
// DisallowUnknownFields causes the Decoder to return an error when the
// destination is a struct and the input contains keys which do not match any
// non-ignored, exported fields in the destination.
func (d *Decoder) DisallowUnknownFields() {
d.disallowUnknownFields = true
}
// Unmarshaler is the interface implemented by objects that can unmarshal an EDN
// description of themselves. The input can be assumed to be a valid encoding of
// an EDN value. UnmarshalEDN must copy the EDN data if it wishes to retain the
// data after returning.
type Unmarshaler interface {
UnmarshalEDN([]byte) error
}
type parseState int
const (
parseToplevel = iota
parseList
parseVector
parseMap
parseSet
parseTagged
parseDiscard
)
// A Decoder reads and decodes EDN objects from an input stream.
type Decoder struct {
disallowUnknownFields bool
lex *lexer
savedError error
rd *bufio.Reader
tagmap *TagMap
mc *MathContext
// parser-specific
prevSlice []byte
prevTtype tokenType
undo bool
// if nextToken returned lexEndPrev, we must write the leftover value at
// next call to nextToken
hasLeftover bool
leftover rune
}
// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
// (The argument to Unmarshal must be a non-nil pointer.)
type InvalidUnmarshalError struct {
Type reflect.Type
}
func (e *InvalidUnmarshalError) Error() string {
if e.Type == nil {
return "edn: Unmarshal(nil)"
}
if e.Type.Kind() != reflect.Ptr {
return "edb: Unmarshal(non-pointer " + e.Type.String() + ")"
}
return "edn: Unmarshal(nil " + e.Type.String() + ")"
}
// An UnmarshalTypeError describes a EDN value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
Value string // description of EDN value - "bool", "array", "number -5"
Type reflect.Type // type of Go value it could not be assigned to
}
func (e *UnmarshalTypeError) Error() string {
return "edn: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
}
// UnhashableError is an error which occurs when the decoder attempted to assign
// an unhashable key to a map or set. The position close to where value was
// found is provided to help debugging.
type UnhashableError struct {
Position int64
}
func (e *UnhashableError) Error() string {
return "edn: unhashable type at position " + strconv.FormatInt(e.Position, 10) + " in input"
}
type UnknownFieldError struct {
Field string // the field name
Type reflect.Type // type of Go struct with a missing field
}
func (e *UnknownFieldError) Error() string {
return "edn: cannot find a field '" + e.Field + "' in a struct " + e.Type.String() + " to unmarshal into"
}
// Decode reads the next EDN-encoded value from its input and stores it in the
// value pointed to by v.
//
// See the documentation for Unmarshal for details about the conversion of EDN
// into a Go value.
func (d *Decoder) Decode(val interface{}) (err error) {
defer func() {
if r := recover(); r != nil {
// if unhashable, return ErrUnhashable. Else panic unless it's an error
// from the decoder itself.
if rerr, ok := r.(runtime.Error); ok {
if strings.Contains(rerr.Error(), "unhashable") {
err = &UnhashableError{Position: d.lex.position}
} else {
panic(r)
}
} else {
err = r.(error)
}
}
}()
err = d.more()
if err != nil {
return err
}
rv := reflect.ValueOf(val)
if rv.Kind() != reflect.Ptr || rv.IsNil() {
return &InvalidUnmarshalError{reflect.TypeOf(val)}
}
d.value(rv)
return nil
}
func newDecoder(buf *bufio.Reader) *Decoder {
lex := lexer{}
lex.reset()
return &Decoder{
lex: &lex,
rd: buf,
hasLeftover: false,
leftover: '\uFFFD',
tagmap: new(TagMap),
}
}
func (d *Decoder) getTagFn(tagname string) *reflect.Value {
d.tagmap.RLock()
f, ok := d.tagmap.m[tagname]
d.tagmap.RUnlock()
if ok {
return &f
}
globalTags.RLock()
f, ok = globalTags.m[tagname]
globalTags.RUnlock()
if ok {
return &f
}
return nil
}
func (d *Decoder) error(err error) {
panic(err)
}
func (d *Decoder) doUndo(bs []byte, ttype tokenType) {
if d.undo {
d.error(errInternal) // this is LL(1), so this shouldn't happen
}
d.undo = true
d.prevSlice = bs
d.prevTtype = ttype
}
// array consumes an array from d.data[d.off-1:], decoding into the value v.
// the first byte of the array ('[') has been read already.
func (d *Decoder) array(v reflect.Value, endType tokenType) {
// Check for unmarshaler.
u, pv := d.indirect(v, false)
if u != nil {
switch endType {
case tokenVectorEnd:
d.doUndo([]byte{'['}, tokenVectorStart)
case tokenListEnd:
d.doUndo([]byte{'('}, tokenListStart)
case tokenSetEnd:
d.doUndo([]byte{'#', '{'}, tokenSetStart)
}
bs, err := d.nextValueBytes()
if err == nil {
err = u.UnmarshalEDN(bs)
}
if err != nil {
d.error(err)
}
return
}
v = pv
// Check type of target.
switch v.Kind() {
case reflect.Interface:
if v.NumMethod() == 0 {
// Decoding into nil interface? Switch to non-reflect code.
v.Set(reflect.ValueOf(d.arrayInterface(endType)))
return
}
// Otherwise it's invalid.
fallthrough
default:
d.error(&UnmarshalTypeError{"array", v.Type()})
return
case reflect.Array:
case reflect.Slice:
break
}
i := 0
for {
// Look ahead for ] - can only happen on first iteration.
bs, ttype, err := d.nextToken()
if err != nil {
d.error(err)
}
if ttype == endType {
break
}
d.doUndo(bs, ttype)
// Get element of array, growing if necessary.
if v.Kind() == reflect.Slice {
// Grow slice if necessary
if i >= v.Cap() {
newcap := v.Cap() + v.Cap()/2
if newcap < 4 {
newcap = 4
}
newv := reflect.MakeSlice(v.Type(), v.Len(), newcap)
reflect.Copy(newv, v)
v.Set(newv)
}
if i >= v.Len() {
v.SetLen(i + 1)
}
}
if i < v.Len() {
// Decode into element.
d.value(v.Index(i))
} else {
// Ran out of fixed array: skip.
d.value(reflect.Value{})
}
i++
}
if i < v.Len() {
if v.Kind() == reflect.Array {
// Array. Zero the rest.
z := reflect.Zero(v.Type().Elem())
for ; i < v.Len(); i++ {
v.Index(i).Set(z)
}
} else {
v.SetLen(i)
}
}
if i == 0 && v.Kind() == reflect.Slice {
v.Set(reflect.MakeSlice(v.Type(), 0, 0))
}
}
func (d *Decoder) arrayInterface(endType tokenType) interface{} {
var v = make([]interface{}, 0)
for {
// look out for endType
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
break
}
if tt == endType {
break
}
d.doUndo(bs, tt)
v = append(v, d.valueInterface())
}
return v
}
func (d *Decoder) value(v reflect.Value) {
if !v.IsValid() {
// read value and ignore it
d.valueInterface()
return
}
bs, ttype, err := d.nextToken()
// check error first
if err != nil {
d.error(err)
return
}
switch ttype {
default:
d.error(errUnexpected)
case tokenSymbol, tokenKeyword, tokenString, tokenInt, tokenFloat, tokenChar:
d.literal(bs, ttype, v)
case tokenTag:
d.tag(bs, v)
case tokenListStart:
d.array(v, tokenListEnd)
case tokenVectorStart:
d.array(v, tokenVectorEnd)
case tokenSetStart:
d.set(v)
case tokenMapStart:
d.ednmap(v)
}
}
func (d *Decoder) tag(tag []byte, v reflect.Value) {
// Check for unmarshaler.
u, pv := d.indirect(v, false)
if u != nil {
bs, err := d.nextValueBytes()
if err == nil {
err = u.UnmarshalEDN(append(append(tag, ' '), bs...))
}
if err != nil {
d.error(err)
}
return
}
v = pv
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(d.tagInterface(tag)))
return
}
fn := d.getTagFn(string(tag[1:]))
if fn == nil {
// So in theory we'd have to match against any interface that could be
// assignable to the Tag type, to ensure we would decode whenever possible.
// That is any interface that specifies any combination of the methods
// MarshalEDN, UnmarshalEDN and String. I'm not sure if that makes sense
// though, so I've punted this for now.
bs, err := d.nextValueBytes()
if err != nil {
d.error(err)
}
d.error(UnknownTagError{tag, bs, v.Type()})
} else {
tfn := fn.Type()
var result reflect.Value
// if not func, just match on struct shape
if tfn.Kind() != reflect.Func {
result = reflect.New(tfn).Elem()
d.value(result)
} else { // otherwise match on input value and call the function
inVal := reflect.New(tfn.In(0))
d.value(inVal)
res := fn.Call([]reflect.Value{inVal.Elem()})
if err, ok := res[1].Interface().(error); ok && err != nil {
d.error(err)
}
result = res[0]
}
// result is not necessarily direct, so we have to make it direct, but
// *only* if it's NOT null at every step. Which leads to the question: How
// do we unify these values? This is particularly hairy if these are double
// pointers or bigger.
// Currently we only attempt to solve this for results by checking if the
// result can be dereferenced into a value. The value will always be a
// non-pointer, so presumably we can assign it in this fashion as a
// temporary resolution.
if result.Type().AssignableTo(v.Type()) {
v.Set(result)
return
}
if result.Kind() == reflect.Ptr && !result.IsNil() &&
result.Elem().Type().AssignableTo(v.Type()) {
// is res a non-nil pointer to a value we can assign to? If yes, then
// let's just do that.
v.Set(result.Elem())
return
}
d.error(fmt.Errorf("Cannot assign %s to %s (tag issue?)", result.Type(), v.Type()))
}
}
func (d *Decoder) tagInterface(tag []byte) interface{} {
fn := d.getTagFn(string(tag[1:]))
if fn == nil {
var t Tag
t.Tagname = string(tag[1:])
t.Value = d.valueInterface()
return t
} else if fn.Type().Kind() != reflect.Func {
res := reflect.New(fn.Type()).Elem()
d.value(res)
return res.Interface()
} else {
tfn := fn.Type()
val := reflect.New(tfn.In(0))
d.value(val)
res := fn.Call([]reflect.Value{val.Elem()})
if err, ok := res[1].Interface().(error); ok && err != nil {
d.error(err)
}
return res[0].Interface()
}
}
func (d *Decoder) valueInterface() interface{} {
bs, ttype, err := d.nextToken()
// check error first
if err != nil {
d.error(err)
return nil /// won't get here
}
switch ttype {
default:
d.error(errUnexpected)
return nil
case tokenSymbol, tokenKeyword, tokenString, tokenInt, tokenFloat, tokenChar:
return d.literalInterface(bs, ttype)
case tokenTag:
return d.tagInterface(bs)
case tokenListStart:
return d.arrayInterface(tokenListEnd)
case tokenVectorStart:
return d.arrayInterface(tokenVectorEnd)
case tokenSetStart:
return d.setInterface()
case tokenMapStart:
return d.ednmapInterface()
}
return nil
}
func (d *Decoder) ednmap(v reflect.Value) {
// Check for unmarshaler.
u, pv := d.indirect(v, false)
if u != nil {
d.doUndo([]byte{'{'}, tokenMapStart)
bs, err := d.nextValueBytes()
if err == nil {
err = u.UnmarshalEDN(bs)
}
if err != nil {
d.error(err)
}
return
}
v = pv
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(d.ednmapInterface()))
return
}
var keyType reflect.Type
// Check type of target: Struct or map[T]U
switch v.Kind() {
case reflect.Map:
t := v.Type()
keyType = t.Key()
if v.IsNil() {
v.Set(reflect.MakeMap(t))
}
case reflect.Struct:
default:
d.error(&UnmarshalTypeError{"map", v.Type()})
}
// separate these to ease reading (theoretically fewer checks too)
if v.Kind() == reflect.Struct {
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
skip := false
var key []byte
// The key can either be a symbol, a keyword or a string. We will skip
// anything that is not any of these values.
switch tt {
case tokenSymbol:
if bytes.Equal(bs, falseByte) || bytes.Equal(bs, trueByte) || bytes.Equal(bs, nilByte) {
skip = true
}
key = bs
case tokenKeyword:
key = bs[1:]
case tokenString:
k, ok := unquoteBytes(bs)
key = k
if !ok {
d.error(errInternal)
}
default:
skip = true
}
if skip { // will panic if something bad happens, so this is fine
d.valueInterface()
continue
}
var subv reflect.Value
var f *field
fields := cachedTypeFields(v.Type())
for i := range fields {
ff := &fields[i]
if bytes.Equal(ff.nameBytes, key) {
f = ff
break
}
if f == nil && ff.equalFold(ff.nameBytes, key) {
f = ff
}
}
if f != nil {
subv = v
for _, i := range f.index {
if subv.Kind() == reflect.Ptr {
if subv.IsNil() {
subv.Set(reflect.New(subv.Type().Elem()))
}
subv = subv.Elem()
}
subv = subv.Field(i)
}
} else if d.disallowUnknownFields {
d.error(&UnknownFieldError{string(key), v.Type()})
}
// If subv not set, value() will just skip.
d.value(subv)
}
// if not struct, then it is a map
} else if keyType.Kind() == reflect.Interface && keyType.NumMethod() == 0 {
// special case for unhashable key types
var mapElem reflect.Value
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
d.doUndo(bs, tt)
key := d.valueInterface()
elemType := v.Type().Elem()
if !mapElem.IsValid() {
mapElem = reflect.New(elemType).Elem()
} else {
mapElem.Set(reflect.Zero(elemType))
}
subv := mapElem
d.value(subv)
if key == nil {
v.SetMapIndex(reflect.New(keyType).Elem(), subv)
} else {
switch reflect.TypeOf(key).Kind() {
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
v.SetMapIndex(reflect.ValueOf(&key), subv)
default:
v.SetMapIndex(reflect.ValueOf(key), subv)
}
}
}
} else { // default map case
var mapElem reflect.Value
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
d.doUndo(bs, tt)
// should we do the same as with mapElem?
key := reflect.New(keyType).Elem()
d.value(key)
elemType := v.Type().Elem()
if !mapElem.IsValid() {
mapElem = reflect.New(elemType).Elem()
} else {
mapElem.Set(reflect.Zero(elemType))
}
subv := mapElem
d.value(subv)
v.SetMapIndex(key, subv)
}
}
}
func (d *Decoder) ednmapInterface() interface{} {
theMap := make(map[interface{}]interface{}, 0)
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenMapEnd {
break
}
d.doUndo(bs, tt)
key := d.valueInterface()
value := d.valueInterface()
// special case on nil here. nil is hashable, so use it as key.
if key == nil {
theMap[key] = value
} else {
switch reflect.TypeOf(key).Kind() {
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
theMap[&key] = value
default:
theMap[key] = value
}
}
}
return theMap
}
func (d *Decoder) set(v reflect.Value) {
// Check for unmarshaler.
u, pv := d.indirect(v, false)
if u != nil {
d.doUndo([]byte{'#', '{'}, tokenSetStart)
bs, err := d.nextValueBytes()
if err == nil {
err = u.UnmarshalEDN(bs)
}
if err != nil {
d.error(err)
}
return
}
v = pv
var setValue reflect.Value
var keyType reflect.Type
// Check type of target.
// TODO: accept option structs? -- i.e. structs where all fields are bools
// TODO: Also accept slices
switch v.Kind() {
case reflect.Map:
// map must have bool or struct{} value type
t := v.Type()
keyType = t.Key()
valKind := t.Elem().Kind()
switch valKind {
case reflect.Bool:
setValue = reflect.ValueOf(true)
case reflect.Struct:
// check if struct, and if so, ensure it has 0 fields
if t.Elem().NumField() != 0 {
d.error(&UnmarshalTypeError{"set", v.Type()})
}
setValue = reflect.Zero(t.Elem())
default:
d.error(&UnmarshalTypeError{"set", v.Type()})
}
if v.IsNil() {
v.Set(reflect.MakeMap(t))
}
case reflect.Slice, reflect.Array:
// Some extent of rechecking going on when we pass it to array, but it
// should be a constant factor only.
d.array(v, tokenSetEnd)
return
case reflect.Interface:
if v.NumMethod() == 0 {
// break out and use setInterface
v.Set(reflect.ValueOf(d.setInterface()))
return
} else {
d.error(&UnmarshalTypeError{"set", v.Type()})
}
default:
d.error(&UnmarshalTypeError{"set", v.Type()})
}
// special case here, to avoid panics when we have slices and maps as keys.
// Split out from code below to improve perf
if keyType.Kind() == reflect.Interface && keyType.NumMethod() == 0 {
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
d.doUndo(bs, tt)
key := d.valueInterface()
// special case on nil here: Need to create a zero type of the specific
// keyType. As this is an interface, this will itself be nil.
if key == nil {
v.SetMapIndex(reflect.New(keyType).Elem(), setValue)
} else {
switch reflect.TypeOf(key).Kind() {
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
v.SetMapIndex(reflect.ValueOf(&key), setValue)
default:
v.SetMapIndex(reflect.ValueOf(key), setValue)
}
}
}
} else {
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
d.doUndo(bs, tt)
key := reflect.New(keyType).Elem()
d.value(key)
v.SetMapIndex(key, setValue)
}
}
}
func (d *Decoder) setInterface() interface{} {
theSet := make(map[interface{}]bool, 0)
for {
bs, tt, err := d.nextToken()
if err != nil {
d.error(err)
}
if tt == tokenSetEnd {
break
}
d.doUndo(bs, tt)
key := d.valueInterface()
if key == nil {
theSet[key] = true
} else {
switch reflect.TypeOf(key).Kind() {
case reflect.Slice, reflect.Map: // bypass issues with unhashable types
theSet[&key] = true
default:
theSet[key] = true
}
}
}
return theSet
}
var nilByte = []byte(`nil`)
var trueByte = []byte(`true`)
var falseByte = []byte(`false`)
var symbolType = reflect.TypeOf(Symbol(""))
var keywordType = reflect.TypeOf(Keyword(""))
var byteSliceType = reflect.TypeOf([]byte(nil))
var bigFloatType = reflect.TypeOf((*big.Float)(nil)).Elem()
var bigIntType = reflect.TypeOf((*big.Int)(nil)).Elem()
func (d *Decoder) literal(bs []byte, ttype tokenType, v reflect.Value) {
wantptr := ttype == tokenSymbol && bytes.Equal(nilByte, bs)
u, pv := d.indirect(v, wantptr)
if u != nil {
err := u.UnmarshalEDN(bs)
if err != nil {
d.error(err)
}
return
}
v = pv
switch ttype {
case tokenSymbol:
if wantptr { // nil
switch v.Kind() {
case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
v.Set(reflect.Zero(v.Type()))
default:
d.error(&UnmarshalTypeError{"nil", v.Type()})
}
} else if bytes.Equal(trueByte, bs) || bytes.Equal(falseByte, bs) { // true|false
value := bs[0] == 't'
switch v.Kind() {
default:
d.error(&UnmarshalTypeError{"bool", v.Type()})
case reflect.Bool:
v.SetBool(value)
case reflect.Interface:
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(value))
} else {
d.error(&UnmarshalTypeError{"bool", v.Type()})
}
}
} else if v.Kind() == reflect.String && v.Type() == symbolType { // "actual" symbols
v.SetString(string(bs))
} else if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(Symbol(string(bs))))
} else {
d.error(&UnmarshalTypeError{"symbol", v.Type()})
}
case tokenKeyword:
if v.Kind() == reflect.String && v.Type() == keywordType { // "actual" keywords
v.SetString(string(bs[1:]))
} else if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(Keyword(string(bs[1:]))))
} else {
d.error(&UnmarshalTypeError{"keyword", v.Type()})
}
case tokenInt:
var s string
isBig := false
if bs[len(bs)-1] == 'N' { // can end with N, which we promptly ignore
// TODO: If the user expects a float and receives what is perceived as an
// int (ends with N), what is the sensible thing to do?
s = string(bs[:len(bs)-1])
isBig = true
} else {
s = string(bs)
}
switch v.Kind() {
default:
switch v.Type() {
case bigIntType:
bi := v.Addr().Interface().(*big.Int)
_, ok := bi.SetString(s, 10)
if !ok {
d.error(errInternal)
}
case bigFloatType:
mc := d.mathContext()
bf := v.Addr().Interface().(*big.Float)
bf = bf.SetPrec(mc.Precision).SetMode(mc.Mode)
_, _, err := bf.Parse(s, 10)
if err != nil { // grumble grumble
d.error(errInternal)
}
default:
d.error(&UnmarshalTypeError{"int", v.Type()})
}
case reflect.Interface:
if !isBig {
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
d.error(&UnmarshalTypeError{"int " + s, reflect.TypeOf(int64(0))})
}
if v.NumMethod() != 0 {
d.error(&UnmarshalTypeError{"int", v.Type()})
}
v.Set(reflect.ValueOf(n))
} else {
bi := new(big.Int)
_, ok := bi.SetString(s, 10)
if !ok {
d.error(errInternal)
}
v.Set(reflect.ValueOf(bi))
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n, err := strconv.ParseInt(s, 10, 64)
if err != nil || v.OverflowInt(n) {
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
}
v.SetInt(n)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
n, err := strconv.ParseUint(s, 10, 64)
if err != nil || v.OverflowUint(n) {
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
}
v.SetUint(n)
case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(s, v.Type().Bits())
if err != nil || v.OverflowFloat(n) {
d.error(&UnmarshalTypeError{"int " + s, v.Type()})
}
v.SetFloat(n)
}
case tokenFloat:
var s string
isBig := false
if bs[len(bs)-1] == 'M' { // can end with M, which we promptly ignore
s = string(bs[:len(bs)-1])
isBig = true
} else {
s = string(bs)
}
switch v.Kind() {
default:
switch v.Type() {
case bigFloatType:
mc := d.mathContext()
bf := v.Addr().Interface().(*big.Float)
bf = bf.SetPrec(mc.Precision).SetMode(mc.Mode)
_, _, err := bf.Parse(s, 10)
if err != nil { // grumble grumble
d.error(errInternal)
}
default:
d.error(&UnmarshalTypeError{"float", v.Type()})
}
case reflect.Interface:
if !isBig {
n, err := strconv.ParseFloat(s, 64)
if err != nil {
d.error(&UnmarshalTypeError{"float " + s, reflect.TypeOf(float64(0))})
}
if v.NumMethod() != 0 {
d.error(&UnmarshalTypeError{"float", v.Type()})
}
v.Set(reflect.ValueOf(n))
} else {
mc := d.mathContext()
bf := new(big.Float).SetPrec(mc.Precision).SetMode(mc.Mode)
_, _, err := bf.Parse(s, 10)
if err != nil { // grumble grumble
d.error(errInternal)
}
v.Set(reflect.ValueOf(bf))
}
case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(s, v.Type().Bits())
if err != nil || v.OverflowFloat(n) {
d.error(&UnmarshalTypeError{"float " + s, v.Type()})
}
v.SetFloat(n)
}
case tokenChar:
r, err := toRune(bs)
if err != nil {
d.error(err)
}
switch v.Kind() {
default:
d.error(&UnmarshalTypeError{"rune", v.Type()})
case reflect.Interface:
if v.NumMethod() != 0 {
d.error(&UnmarshalTypeError{"rune", v.Type()})
}
v.Set(reflect.ValueOf(r))
case reflect.Int32: // rune is an alias for int32
v.SetInt(int64(r))
}
case tokenString:
s, ok := unquoteBytes(bs)
if !ok {
d.error(errInternal)
}
switch v.Kind() {
default:
d.error(&UnmarshalTypeError{"string", v.Type()})
case reflect.String:
v.SetString(string(s))
case reflect.Interface:
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(string(s)))
} else {
d.error(&UnmarshalTypeError{"string", v.Type()})
}
}
default:
d.error(errInternal)
}
}
func (d *Decoder) literalInterface(bs []byte, ttype tokenType) interface{} {
switch ttype {
case tokenSymbol:
if bytes.Equal(nilByte, bs) {
return nil
}
if bytes.Equal(trueByte, bs) {
return true
}
if bytes.Equal(falseByte, bs) {
return false
}
return Symbol(string(bs))
case tokenKeyword:
return Keyword(string(bs[1:]))
case tokenInt:
if bs[len(bs)-1] == 'N' { // can end with N
var bi big.Int
s := string(bs[:len(bs)-1])
_, ok := bi.SetString(s, 10)
if !ok {
d.error(errInternal)
}
return bi
} else {
s := string(bs)
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
d.error(err)
}
return n
}
case tokenFloat:
var s string
if bs[len(bs)-1] == 'M' { // can end with M, which we promptly ignore
s = string(bs[:len(bs)-1])
} else {
s = string(bs)
}
n, err := strconv.ParseFloat(s, 64)
if err != nil {
d.error(err)
}
return n
case tokenChar:
r, err := toRune(bs)
if err != nil {
d.error(err)
}
return r
case tokenString:
t, ok := unquote(bs)
if !ok {
d.error(errInternal)
}
return t
default:
d.error(errInternal)
return nil
}
}
var (
newlineBytes = []byte(`\newline`)
returnBytes = []byte(`\return`)
spaceBytes = []byte(`\space`)
tabBytes = []byte(`\tab`)
formfeedBytes = []byte(`\formfeed`)
)
func toRune(bs []byte) (rune, error) {
// handle special cases first:
switch {
case bytes.Equal(bs, newlineBytes):
return '\n', nil
case bytes.Equal(bs, returnBytes):
return '\r', nil
case bytes.Equal(bs, spaceBytes):
return ' ', nil
case bytes.Equal(bs, tabBytes):
return '\t', nil
case bytes.Equal(bs, formfeedBytes):
return '\f', nil
case len(bs) == 6 && bs[1] == 'u': // I don't think unicode chars could be 5 bytes long?
return getu4(bs), nil
default:
r, size := utf8.DecodeRune(bs[1:])
if r == utf8.RuneError && size == 1 {
return r, errIllegalRune
}
return r, nil
}
}
// nextToken handles #_
func (d *Decoder) nextToken() ([]byte, tokenType, error) {
bs, tt, err := d.rawToken()
if err != nil {
return bs, tt, err
}
switch tt {
case tokenDiscard:
err := d.traverseValue()
if err != nil {
return nil, tokenError, err
}
return d.nextToken() // again for discards
default:
return bs, tt, err
}
}
func (d *Decoder) rawToken() ([]byte, tokenType, error) {
if d.undo {
d.undo = false
b := d.prevSlice
tt := d.prevTtype
d.prevSlice = nil
d.prevTtype = tokenError
return b, tt, nil
}
var val bytes.Buffer
d.lex.reset()
doIgnore := true
if d.hasLeftover {
d.hasLeftover = false
d.lex.position++
switch d.lex.state(d.leftover) {
case lexCont:
val.WriteRune(d.leftover)
doIgnore = false
case lexEnd:
val.WriteRune(d.leftover)
return val.Bytes(), d.lex.token, nil
case lexEndPrev:
return nil, tokenError, errInternal
case lexError:
return nil, tokenError, d.lex.err
case lexIgnore:
// just ignore
}
}
if doIgnore { // ignore whitespace
readWhitespace:
for {
r, _, err := d.rd.ReadRune()
if err == io.EOF {
return nil, tokenError, errNoneLeft
}
if err != nil {
return nil, tokenError, err
}
d.lex.position++
switch d.lex.state(r) {
case lexCont: // got a value, so continue on past doIgnoring
// TODO: This returns an error. Will it happen in practice? Probably?
val.WriteRune(r)
break readWhitespace
case lexError:
return nil, tokenError, d.lex.err
case lexEnd:
val.WriteRune(r)
return val.Bytes(), d.lex.token, nil
case lexEndPrev:
return nil, tokenError, errInternal
case lexIgnore:
// keep on reading
}
}
}
for {
r, _, err := d.rd.ReadRune()
var ls lexState
// this is not exactly perfect.
switch {
case err == io.EOF:
ls = d.lex.eof()
case err != nil:
return nil, tokenError, err
default:
d.lex.position++
ls = d.lex.state(r)
}
switch ls {
case lexCont:
val.WriteRune(r)
case lexIgnore:
if err != io.EOF {
return nil, tokenError, errInternal
} else {
return nil, tokenError, errNoneLeft
}
case lexEnd:
if err != io.EOF {
val.WriteRune(r)
}
return val.Bytes(), d.lex.token, nil
case lexEndPrev:
d.hasLeftover = true
d.leftover = r
return val.Bytes(), d.lex.token, nil
case lexError:
return nil, tokenError, d.lex.err
}
}
}
// traverseValue reads a single value and skips it -- whether it is a list, map
// or a literal. Doesn't validate its state. skips over discard tokens as well.
func (d *Decoder) traverseValue() error {
tstack := newTokenStack()
for {
_, tt, err := d.nextToken()
if err != nil {
return err
}
err = tstack.push(tt)
if err != nil || tstack.done() {
return err
}
}
}
type tokenStackElem struct {
tt tokenType
count int
}
type tokenStack struct {
toks []tokenStackElem
toplevel tokenType
}
func newTokenStack() *tokenStack {
return &tokenStack{
toks: nil,
toplevel: tokenError,
}
}
func (t *tokenStack) done() bool {
return len(t.toks) == 0 && t.toplevel != tokenDiscard
}
func (t *tokenStack) peek() tokenType {
return t.toks[len(t.toks)-1].tt
}
func (t *tokenStack) peekCount() int {
return t.toks[len(t.toks)-1].count
}
func (t *tokenStack) pop() {
t.toks = t.toks[:len(t.toks)-1]
}
func (t *tokenStack) push(tt tokenType) error {
// retain toplevel value for done check
if len(t.toks) == 0 {
t.toplevel = tt
}
switch tt {
case tokenMapStart, tokenVectorStart, tokenListStart, tokenSetStart, tokenDiscard, tokenTag:
// append to toks, regardless
t.toks = append(t.toks, tokenStackElem{tt, 0})
return nil
case tokenMapEnd:
if len(t.toks) == 0 || (t.peek() != tokenMapStart && t.peek() != tokenSetStart) {
return errUnexpected
}
t.pop()
case tokenListEnd:
if len(t.toks) == 0 || t.peek() != tokenListStart {
return errUnexpected
}
t.pop()
case tokenVectorEnd:
if len(t.toks) == 0 || t.peek() != tokenVectorStart {
return errUnexpected
}
t.pop()
default:
}
if len(t.toks) > 0 {
t.toks[len(t.toks)-1].count++
}
// popping of discards and tags
for len(t.toks) > 0 && t.peek() == tokenTag {
t.pop()
if len(t.toks) > 0 {
t.toks[len(t.toks)-1].count++
}
}
if len(t.toks) > 0 && t.peek() == tokenDiscard {
t.pop()
}
return nil
}
// more removes whitespace and discards, and returns nil if there is more data.
// If the end of the stream is found, io.EOF is sent back. If an error happens
// while parsing a discard value, it is passed up.
func (d *Decoder) more() error {
if d.undo {
return nil
}
if d.hasLeftover && d.leftover == '#' {
// check if next rune is '_'
r, _, err := d.rd.ReadRune()
if err == io.EOF {
return errNoneLeft
}
if err != nil {
return err
}
if r != '_' {
// it's not discard, so let's just unread the rune
return d.rd.UnreadRune()
}
// need to consume a value
d.hasLeftover = false
d.leftover = '\uFFFD'
d.lex.position += 2
err = d.traverseValue()
if err != nil {
return err
}
return d.more()
}
if d.hasLeftover && !isWhitespace(d.leftover) && d.leftover != ';' {
return nil
}
// If we've come to this step, we need to read whitespace and -- if we find
// something suspicious, we need to check if it can be assumed to be
// whitespace.
d.lex.reset()
for {
var r rune
var err error
readWhitespace:
for {
r, _, err = d.rd.ReadRune()
if err != nil {
return err
// if we hit the end of the line, then we don't have more and we return
// io.EOF
}
d.lex.position++
switch d.lex.state(r) {
case lexCont: // found something that looks like a value, so break out of whitespace loop
break readWhitespace
case lexError:
return d.lex.err
case lexEnd: // found a delimiter of some sort, so store it as leftover and return nil
d.hasLeftover = true
d.leftover = r
d.lex.position--
return nil
case lexEndPrev:
return errInternal
case lexIgnore:
// keep on readin'
}
}
if r == '#' { // the edge case again, so let's gobble
// check if next rune is '_'
r, _, err := d.rd.ReadRune()
if err == io.EOF {
return errNoneLeft
}
if err != nil {
return err
}
if r != '_' {
// it's not discard, so we unread the rune and put # as leftover
d.leftover = '#'
d.hasLeftover = true
d.lex.position--
return d.rd.UnreadRune()
}
// need to consume a value
d.hasLeftover = false
d.leftover = '\uFFFD'
d.lex.position += 2
err = d.traverseValue()
if err != nil {
return err
}
return d.more()
} else { // we could do unreadrune here too, would've been just as fine
d.hasLeftover = true
d.leftover = r
d.lex.position--
return nil
}
}
}
// Oh, asking about why this is so similar to the part above, eh? Yes, I would
// also consider this a crime. At least I use the same lexer. This is probably
// next on the list when I have people complaining about perf issues.
func (d *Decoder) nextValueBytes() ([]byte, error) {
// TODO: Ensure values inside maps come in pairs.
tstack := newTokenStack()
var val bytes.Buffer
if d.undo {
d.undo = false
b := d.prevSlice
tt := d.prevTtype
d.prevSlice = nil
d.prevTtype = tokenError
if tt == tokenDiscard { // should be impossible to get a tokenDiscard here?
return nil, errInternal
}
err := tstack.push(tt)
if err != nil || tstack.done() {
return val.Bytes(), err
}
val.Write(b)
}
readElems:
for {
d.lex.reset()
// Can't ignore whitespace in general. So I guess we just add it onto the buffer
readWs := true
if d.hasLeftover {
// we can have leftover from previous iteration. e.g. "foo[bar]" will have
// leftover "[" and "]"
d.hasLeftover = false
d.lex.position++
val.WriteRune(d.leftover)
switch d.lex.state(d.leftover) {
case lexCont:
readWs = false
case lexEnd:
err := tstack.push(d.lex.token)
if err != nil || tstack.done() {
return val.Bytes(), err
}
d.lex.reset()
case lexEndPrev:
return nil, errInternal
case lexError:
return nil, d.lex.err
case lexIgnore:
// just keep going
}
}
if readWs {
readWhitespace:
// If we end up here, it means we expect at least one more token
for {
r, _, err := d.rd.ReadRune()
if err == io.EOF {
return nil, errNoneLeft
}
if err != nil {
return nil, err
}
d.lex.position++
val.WriteRune(r)
switch d.lex.state(r) {
case lexCont: // found something that looks like a value, so break out of whitespace loop
break readWhitespace
case lexError:
return nil, d.lex.err
case lexEnd:
err := tstack.push(d.lex.token)
if err != nil || tstack.done() {
return val.Bytes(), err
}
// Here we'd usually continue on next iteration loop (which is safe
// and valid), but since we know we don't have any leftovers, we can
// just reset the lexer and keep attempting to read whitespace.
d.lex.reset()
case lexEndPrev:
return nil, errInternal
case lexIgnore:
// keep on readin'
}
}
}
// read element
for {
r, rlength, err := d.rd.ReadRune()
var ls lexState
// ugh, this is not exactly perfect.
switch {
case err == io.EOF:
ls = d.lex.eof()
case err != nil:
return nil, err
default:
d.lex.position++
val.WriteRune(r)
ls = d.lex.state(r)
}
switch ls {
case lexCont:
// keep going
case lexIgnore:
if err != io.EOF {
return nil, errInternal
} else {
return nil, errNoneLeft
}
case lexEnd:
ioErr := err
err := tstack.push(d.lex.token)
if err != nil || tstack.done() {
return val.Bytes(), err
}
if ioErr == io.EOF /* && !tstack.done() */ {
return nil, errNoneLeft
}
continue readElems
case lexEndPrev: // if err == io.EOF then we cannot end up here. (Invariant forced by lexer)
val.Truncate(val.Len() - rlength)
d.hasLeftover = true
d.leftover = r
err := tstack.push(d.lex.token)
if err != nil || tstack.done() {
return val.Bytes(), err
}
continue readElems
case lexError:
return nil, d.lex.err
}
}
}
}