status-go/vendor/github.com/segmentio/encoding/json/parse.go

package json

import (
	"bytes"
	"encoding/binary"
	"math"
	"math/bits"
	"reflect"
	"unicode"
	"unicode/utf16"
	"unicode/utf8"

	"github.com/segmentio/encoding/ascii"
)

// All spaces characters defined in the json specification.
const (
	sp = ' '
	ht = '\t'
	nl = '\n'
	cr = '\r'
)

const (
	escape = '\\'
	quote  = '"'
)

func internalParseFlags(b []byte) (flags ParseFlags) {
	// Don't consider surrounding whitespace
	b = skipSpaces(b)
	b = trimTrailingSpaces(b)
	if ascii.ValidPrint(b) {
		flags |= validAsciiPrint
	}
	if bytes.IndexByte(b, '\\') == -1 {
		flags |= noBackslash
	}
	return
}

func skipSpaces(b []byte) []byte {
	if len(b) > 0 && b[0] <= 0x20 {
		b, _ = skipSpacesN(b)
	}
	return b
}

func skipSpacesN(b []byte) ([]byte, int) {
	for i := range b {
		switch b[i] {
		case sp, ht, nl, cr:
		default:
			return b[i:], i
		}
	}
	return nil, 0
}

func trimTrailingSpaces(b []byte) []byte {
	if len(b) > 0 && b[len(b)-1] <= 0x20 {
		b = trimTrailingSpacesN(b)
	}
	return b
}

func trimTrailingSpacesN(b []byte) []byte {
	i := len(b) - 1
loop:
	for ; i >= 0; i-- {
		switch b[i] {
		case sp, ht, nl, cr:
		default:
			break loop
		}
	}
	return b[:i+1]
}

// parseInt parses a decimal representation of an int64 from b.
//
// The function is equivalent to calling strconv.ParseInt(string(b), 10, 64) but
// it prevents Go from making a memory allocation for converting a byte slice to
// a string (escape analysis fails due to the error returned by strconv.ParseInt).
//
// Because it only works with base 10 the function is also significantly faster
// than strconv.ParseInt.
func (d decoder) parseInt(b []byte, t reflect.Type) (int64, []byte, error) {
	var value int64
	var count int

	if len(b) == 0 {
		return 0, b, syntaxError(b, "cannot decode integer from an empty input")
	}

	if b[0] == '-' {
		const max = math.MinInt64
		const lim = max / 10

		if len(b) == 1 {
			return 0, b, syntaxError(b, "cannot decode integer from '-'")
		}

		if len(b) > 2 && b[1] == '0' && '0' <= b[2] && b[2] <= '9' {
			return 0, b, syntaxError(b, "invalid leading character '0' in integer")
		}

		for _, c := range b[1:] {
			if !(c >= '0' && c <= '9') {
				if count == 0 {
					b, err := d.inputError(b, t)
					return 0, b, err
				}
				break
			}

			if value < lim {
				return 0, b, unmarshalOverflow(b, t)
			}

			value *= 10
			x := int64(c - '0')

			if value < (max + x) {
				return 0, b, unmarshalOverflow(b, t)
			}

			value -= x
			count++
		}

		count++
	} else {
		if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
			return 0, b, syntaxError(b, "invalid leading character '0' in integer")
		}

		for ; count < len(b) && b[count] >= '0' && b[count] <= '9'; count++ {
			x := int64(b[count] - '0')
			next := value*10 + x
			if next < value {
				return 0, b, unmarshalOverflow(b, t)
			}
			value = next
		}

		if count == 0 {
			b, err := d.inputError(b, t)
			return 0, b, err
		}
	}

	if count < len(b) {
		switch b[count] {
		case '.', 'e', 'E': // was this actually a float?
			v, r, _, err := d.parseNumber(b)
			if err != nil {
				v, r = b[:count+1], b[count+1:]
			}
			return 0, r, unmarshalTypeError(v, t)
		}
	}

	return value, b[count:], nil
}

// parseUint is like parseInt but for unsigned integers.
func (d decoder) parseUint(b []byte, t reflect.Type) (uint64, []byte, error) {
	var value uint64
	var count int

	if len(b) == 0 {
		return 0, b, syntaxError(b, "cannot decode integer value from an empty input")
	}

	if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' {
		return 0, b, syntaxError(b, "invalid leading character '0' in integer")
	}

	for ; count < len(b) && b[count] >= '0' && b[count] <= '9'; count++ {
		x := uint64(b[count] - '0')
		next := value*10 + x
		if next < value {
			return 0, b, unmarshalOverflow(b, t)
		}
		value = next
	}

	if count == 0 {
		b, err := d.inputError(b, t)
		return 0, b, err
	}

	if count < len(b) {
		switch b[count] {
		case '.', 'e', 'E': // was this actually a float?
			v, r, _, err := d.parseNumber(b)
			if err != nil {
				v, r = b[:count+1], b[count+1:]
			}
			return 0, r, unmarshalTypeError(v, t)
		}
	}

	return value, b[count:], nil
}

// parseUintHex parses a hexadecimanl representation of a uint64 from b.
//
// The function is equivalent to calling strconv.ParseUint(string(b), 16, 64) but
// it prevents Go from making a memory allocation for converting a byte slice to
// a string (escape analysis fails due to the error returned by strconv.ParseUint).
//
// Because it only works with base 16 the function is also significantly faster
// than strconv.ParseUint.
func (d decoder) parseUintHex(b []byte) (uint64, []byte, error) {
	const max = math.MaxUint64
	const lim = max / 0x10

	var value uint64
	var count int

	if len(b) == 0 {
		return 0, b, syntaxError(b, "cannot decode hexadecimal value from an empty input")
	}

parseLoop:
	for i, c := range b {
		var x uint64

		switch {
		case c >= '0' && c <= '9':
			x = uint64(c - '0')

		case c >= 'A' && c <= 'F':
			x = uint64(c-'A') + 0xA

		case c >= 'a' && c <= 'f':
			x = uint64(c-'a') + 0xA

		default:
			if i == 0 {
				return 0, b, syntaxError(b, "expected hexadecimal digit but found '%c'", c)
			}
			break parseLoop
		}

		if value > lim {
			return 0, b, syntaxError(b, "hexadecimal value out of range")
		}

		if value *= 0x10; value > (max - x) {
			return 0, b, syntaxError(b, "hexadecimal value out of range")
		}

		value += x
		count++
	}

	return value, b[count:], nil
}

func (d decoder) parseNull(b []byte) ([]byte, []byte, Kind, error) {
	if hasNullPrefix(b) {
		return b[:4], b[4:], Null, nil
	}
	if len(b) < 4 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}
	return nil, b, Undefined, syntaxError(b, "expected 'null' but found invalid token")
}

func (d decoder) parseTrue(b []byte) ([]byte, []byte, Kind, error) {
	if hasTruePrefix(b) {
		return b[:4], b[4:], True, nil
	}
	if len(b) < 4 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}
	return nil, b, Undefined, syntaxError(b, "expected 'true' but found invalid token")
}

func (d decoder) parseFalse(b []byte) ([]byte, []byte, Kind, error) {
	if hasFalsePrefix(b) {
		return b[:5], b[5:], False, nil
	}
	if len(b) < 5 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}
	return nil, b, Undefined, syntaxError(b, "expected 'false' but found invalid token")
}

func (d decoder) parseNumber(b []byte) (v, r []byte, kind Kind, err error) {
	if len(b) == 0 {
		r, err = b, unexpectedEOF(b)
		return
	}

	// Assume it's an unsigned integer at first.
	kind = Uint

	i := 0
	// sign
	if b[i] == '-' {
		kind = Int
		i++
	}

	if i == len(b) {
		r, err = b[i:], syntaxError(b, "missing number value after sign")
		return
	}

	if b[i] < '0' || b[i] > '9' {
		r, err = b[i:], syntaxError(b, "expected digit but got '%c'", b[i])
		return
	}

	// integer part
	if b[i] == '0' {
		i++
		if i == len(b) || (b[i] != '.' && b[i] != 'e' && b[i] != 'E') {
			v, r = b[:i], b[i:]
			return
		}
		if '0' <= b[i] && b[i] <= '9' {
			r, err = b[i:], syntaxError(b, "cannot decode number with leading '0' character")
			return
		}
	}

	for i < len(b) && '0' <= b[i] && b[i] <= '9' {
		i++
	}

	// decimal part
	if i < len(b) && b[i] == '.' {
		kind = Float
		i++
		decimalStart := i

		for i < len(b) {
			if c := b[i]; !('0' <= c && c <= '9') {
				if i == decimalStart {
					r, err = b[i:], syntaxError(b, "expected digit but found '%c'", c)
					return
				}
				break
			}
			i++
		}

		if i == decimalStart {
			r, err = b[i:], syntaxError(b, "expected decimal part after '.'")
			return
		}
	}

	// exponent part
	if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
		kind = Float
		i++

		if i < len(b) {
			if c := b[i]; c == '+' || c == '-' {
				i++
			}
		}

		if i == len(b) {
			r, err = b[i:], syntaxError(b, "missing exponent in number")
			return
		}

		exponentStart := i

		for i < len(b) {
			if c := b[i]; !('0' <= c && c <= '9') {
				if i == exponentStart {
					err = syntaxError(b, "expected digit but found '%c'", c)
					return
				}
				break
			}
			i++
		}
	}

	v, r = b[:i], b[i:]
	return
}

func (d decoder) parseUnicode(b []byte) (rune, int, error) {
	if len(b) < 4 {
		return 0, len(b), syntaxError(b, "unicode code point must have at least 4 characters")
	}

	u, r, err := d.parseUintHex(b[:4])
	if err != nil {
		return 0, 4, syntaxError(b, "parsing unicode code point: %s", err)
	}

	if len(r) != 0 {
		return 0, 4, syntaxError(b, "invalid unicode code point")
	}

	return rune(u), 4, nil
}

func (d decoder) parseString(b []byte) ([]byte, []byte, Kind, error) {
	if len(b) < 2 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}
	if b[0] != '"' {
		return nil, b, Undefined, syntaxError(b, "expected '\"' at the beginning of a string value")
	}

	var n int
	if len(b) >= 9 {
		// This is an optimization for short strings. We read 8/16 bytes,
		// and XOR each with 0x22 (") so that these bytes (and only
		// these bytes) are now zero. We use the hasless(u,1) trick
		// from https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
		// to determine whether any bytes are zero. Finally, we CTZ
		// to find the index of that byte.
		const mask1 = 0x2222222222222222
		const mask2 = 0x0101010101010101
		const mask3 = 0x8080808080808080
		u := binary.LittleEndian.Uint64(b[1:]) ^ mask1
		if mask := (u - mask2) & ^u & mask3; mask != 0 {
			n = bits.TrailingZeros64(mask)/8 + 2
			goto found
		}
		if len(b) >= 17 {
			u = binary.LittleEndian.Uint64(b[9:]) ^ mask1
			if mask := (u - mask2) & ^u & mask3; mask != 0 {
				n = bits.TrailingZeros64(mask)/8 + 10
				goto found
			}
		}
	}
	n = bytes.IndexByte(b[1:], '"') + 2
	if n <= 1 {
		return nil, b[len(b):], Undefined, syntaxError(b, "missing '\"' at the end of a string value")
	}
found:
	if (d.flags.has(noBackslash) || bytes.IndexByte(b[1:n], '\\') < 0) &&
		(d.flags.has(validAsciiPrint) || ascii.ValidPrint(b[1:n])) {
		return b[:n], b[n:], Unescaped, nil
	}

	for i := 1; i < len(b); i++ {
		switch b[i] {
		case '\\':
			if i++; i < len(b) {
				switch b[i] {
				case '"', '\\', '/', 'n', 'r', 't', 'f', 'b':
				case 'u':
					_, n, err := d.parseUnicode(b[i+1:])
					if err != nil {
						return nil, b[i+1+n:], Undefined, err
					}
					i += n
				default:
					return nil, b, Undefined, syntaxError(b, "invalid character '%c' in string escape code", b[i])
				}
			}

		case '"':
			return b[:i+1], b[i+1:], String, nil

		default:
			if b[i] < 0x20 {
				return nil, b, Undefined, syntaxError(b, "invalid character '%c' in string escape code", b[i])
			}
		}
	}

	return nil, b[len(b):], Undefined, syntaxError(b, "missing '\"' at the end of a string value")
}

func (d decoder) parseStringUnquote(b []byte, r []byte) ([]byte, []byte, bool, error) {
	s, b, k, err := d.parseString(b)
	if err != nil {
		return s, b, false, err
	}

	s = s[1 : len(s)-1] // trim the quotes

	if k == Unescaped {
		return s, b, false, nil
	}

	if r == nil {
		r = make([]byte, 0, len(s))
	}

	for len(s) != 0 {
		i := bytes.IndexByte(s, '\\')

		if i < 0 {
			r = appendCoerceInvalidUTF8(r, s)
			break
		}

		r = appendCoerceInvalidUTF8(r, s[:i])
		s = s[i+1:]

		c := s[0]
		switch c {
		case '"', '\\', '/':
			// simple escaped character
		case 'n':
			c = '\n'

		case 'r':
			c = '\r'

		case 't':
			c = '\t'

		case 'b':
			c = '\b'

		case 'f':
			c = '\f'

		case 'u':
			s = s[1:]

			r1, n1, err := d.parseUnicode(s)
			if err != nil {
				return r, b, true, err
			}
			s = s[n1:]

			if utf16.IsSurrogate(r1) {
				if !hasPrefix(s, `\u`) {
					r1 = unicode.ReplacementChar
				} else {
					r2, n2, err := d.parseUnicode(s[2:])
					if err != nil {
						return r, b, true, err
					}
					if r1 = utf16.DecodeRune(r1, r2); r1 != unicode.ReplacementChar {
						s = s[2+n2:]
					}
				}
			}

			r = appendRune(r, r1)
			continue

		default: // not sure what this escape sequence is
			return r, b, false, syntaxError(s, "invalid character '%c' in string escape code", c)
		}

		r = append(r, c)
		s = s[1:]
	}

	return r, b, true, nil
}

func appendRune(b []byte, r rune) []byte {
	n := len(b)
	b = append(b, 0, 0, 0, 0)
	return b[:n+utf8.EncodeRune(b[n:], r)]
}

func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
	c := [4]byte{}

	for _, r := range string(s) {
		b = append(b, c[:utf8.EncodeRune(c[:], r)]...)
	}

	return b
}

func (d decoder) parseObject(b []byte) ([]byte, []byte, Kind, error) {
	if len(b) < 2 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}

	if b[0] != '{' {
		return nil, b, Undefined, syntaxError(b, "expected '{' at the beginning of an object value")
	}

	var err error
	var a = b
	var n = len(b)
	var i = 0

	b = b[1:]
	for {
		b = skipSpaces(b)

		if len(b) == 0 {
			return nil, b, Undefined, syntaxError(b, "cannot decode object from empty input")
		}

		if b[0] == '}' {
			j := (n - len(b)) + 1
			return a[:j], a[j:], Object, nil
		}

		if i != 0 {
			if len(b) == 0 {
				return nil, b, Undefined, syntaxError(b, "unexpected EOF after object field value")
			}
			if b[0] != ',' {
				return nil, b, Undefined, syntaxError(b, "expected ',' after object field value but found '%c'", b[0])
			}
			b = skipSpaces(b[1:])
			if len(b) == 0 {
				return nil, b, Undefined, unexpectedEOF(b)
			}
			if b[0] == '}' {
				return nil, b, Undefined, syntaxError(b, "unexpected trailing comma after object field")
			}
		}

		_, b, _, err = d.parseString(b)
		if err != nil {
			return nil, b, Undefined, err
		}
		b = skipSpaces(b)

		if len(b) == 0 {
			return nil, b, Undefined, syntaxError(b, "unexpected EOF after object field key")
		}
		if b[0] != ':' {
			return nil, b, Undefined, syntaxError(b, "expected ':' after object field key but found '%c'", b[0])
		}
		b = skipSpaces(b[1:])

		_, b, _, err = d.parseValue(b)
		if err != nil {
			return nil, b, Undefined, err
		}

		i++
	}
}

func (d decoder) parseArray(b []byte) ([]byte, []byte, Kind, error) {
	if len(b) < 2 {
		return nil, b[len(b):], Undefined, unexpectedEOF(b)
	}

	if b[0] != '[' {
		return nil, b, Undefined, syntaxError(b, "expected '[' at the beginning of array value")
	}

	var err error
	var a = b
	var n = len(b)
	var i = 0

	b = b[1:]
	for {
		b = skipSpaces(b)

		if len(b) == 0 {
			return nil, b, Undefined, syntaxError(b, "missing closing ']' after array value")
		}

		if b[0] == ']' {
			j := (n - len(b)) + 1
			return a[:j], a[j:], Array, nil
		}

		if i != 0 {
			if len(b) == 0 {
				return nil, b, Undefined, syntaxError(b, "unexpected EOF after array element")
			}
			if b[0] != ',' {
				return nil, b, Undefined, syntaxError(b, "expected ',' after array element but found '%c'", b[0])
			}
			b = skipSpaces(b[1:])
			if len(b) == 0 {
				return nil, b, Undefined, unexpectedEOF(b)
			}
			if b[0] == ']' {
				return nil, b, Undefined, syntaxError(b, "unexpected trailing comma after object field")
			}
		}

		_, b, _, err = d.parseValue(b)
		if err != nil {
			return nil, b, Undefined, err
		}

		i++
	}
}

func (d decoder) parseValue(b []byte) ([]byte, []byte, Kind, error) {
	if len(b) == 0 {
		return nil, b, Undefined, syntaxError(b, "unexpected end of JSON input")
	}

	var v []byte
	var k Kind
	var err error

	switch b[0] {
	case '{':
		v, b, k, err = d.parseObject(b)
	case '[':
		k = Array
		v, b, k, err = d.parseArray(b)
	case '"':
		v, b, k, err = d.parseString(b)
	case 'n':
		v, b, k, err = d.parseNull(b)
	case 't':
		v, b, k, err = d.parseTrue(b)
	case 'f':
		v, b, k, err = d.parseFalse(b)
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		v, b, k, err = d.parseNumber(b)
	default:
		err = syntaxError(b, "invalid character '%c' looking for beginning of value", b[0])
	}

	return v, b, k, err
}

func hasNullPrefix(b []byte) bool {
	return len(b) >= 4 && string(b[:4]) == "null"
}

func hasTruePrefix(b []byte) bool {
	return len(b) >= 4 && string(b[:4]) == "true"
}

func hasFalsePrefix(b []byte) bool {
	return len(b) >= 5 && string(b[:5]) == "false"
}

func hasPrefix(b []byte, s string) bool {
	return len(b) >= len(s) && s == string(b[:len(s)])
}

func hasLeadingSign(b []byte) bool {
	return len(b) > 0 && (b[0] == '+' || b[0] == '-')
}

func hasLeadingZeroes(b []byte) bool {
	if hasLeadingSign(b) {
		b = b[1:]
	}
	return len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9'
}

func appendToLower(b, s []byte) []byte {
	if ascii.Valid(s) { // fast path for ascii strings
		i := 0

		for j := range s {
			c := s[j]

			if 'A' <= c && c <= 'Z' {
				b = append(b, s[i:j]...)
				b = append(b, c+('a'-'A'))
				i = j + 1
			}
		}

		return append(b, s[i:]...)
	}

	for _, r := range string(s) {
		b = appendRune(b, foldRune(r))
	}

	return b
}

func foldRune(r rune) rune {
	if r = unicode.SimpleFold(r); 'A' <= r && r <= 'Z' {
		r = r + ('a' - 'A')
	}
	return r
}