417 lines
9.9 KiB
Go
417 lines
9.9 KiB
Go
package json
|
|
|
|
import (
|
|
"strconv"
|
|
"sync"
|
|
"unsafe"
|
|
)
|
|
|
|
// Tokenizer is an iterator-style type which can be used to progressively parse
|
|
// through a json input.
|
|
//
|
|
// Tokenizing json is useful to build highly efficient parsing operations, for
|
|
// example when doing tranformations on-the-fly where as the program reads the
|
|
// input and produces the transformed json to an output buffer.
|
|
//
|
|
// Here is a common pattern to use a tokenizer:
|
|
//
|
|
// for t := json.NewTokenizer(b); t.Next(); {
|
|
// switch k := t.Kind(); k.Class() {
|
|
// case json.Null:
|
|
// ...
|
|
// case json.Bool:
|
|
// ...
|
|
// case json.Num:
|
|
// ...
|
|
// case json.String:
|
|
// ...
|
|
// case json.Array:
|
|
// ...
|
|
// case json.Object:
|
|
// ...
|
|
// }
|
|
// }
|
|
//
|
|
type Tokenizer struct {
|
|
// When the tokenizer is positioned on a json delimiter this field is not
|
|
// zero. In this case the possible values are '{', '}', '[', ']', ':', and
|
|
// ','.
|
|
Delim Delim
|
|
|
|
// This field contains the raw json token that the tokenizer is pointing at.
|
|
// When Delim is not zero, this field is a single-element byte slice
|
|
// continaing the delimiter value. Otherwise, this field holds values like
|
|
// null, true, false, numbers, or quoted strings.
|
|
Value RawValue
|
|
|
|
// When the tokenizer has encountered invalid content this field is not nil.
|
|
Err error
|
|
|
|
// When the value is in an array or an object, this field contains the depth
|
|
// at which it was found.
|
|
Depth int
|
|
|
|
// When the value is in an array or an object, this field contains the
|
|
// position at which it was found.
|
|
Index int
|
|
|
|
// This field is true when the value is the key of an object.
|
|
IsKey bool
|
|
|
|
// Tells whether the next value read from the tokenizer is a key.
|
|
isKey bool
|
|
|
|
// json input for the tokenizer, pointing at data right after the last token
|
|
// that was parsed.
|
|
json []byte
|
|
|
|
// Stack used to track entering and leaving arrays, objects, and keys.
|
|
stack *stack
|
|
|
|
// Decoder used for parsing.
|
|
decoder
|
|
}
|
|
|
|
// NewTokenizer constructs a new Tokenizer which reads its json input from b.
|
|
func NewTokenizer(b []byte) *Tokenizer {
|
|
return &Tokenizer{
|
|
json: b,
|
|
decoder: decoder{flags: internalParseFlags(b)},
|
|
}
|
|
}
|
|
|
|
// Reset erases the state of t and re-initializes it with the json input from b.
|
|
func (t *Tokenizer) Reset(b []byte) {
|
|
if t.stack != nil {
|
|
releaseStack(t.stack)
|
|
}
|
|
// This code is similar to:
|
|
//
|
|
// *t = Tokenizer{json: b}
|
|
//
|
|
// However, it does not compile down to an invocation of duff-copy.
|
|
t.Delim = 0
|
|
t.Value = nil
|
|
t.Err = nil
|
|
t.Depth = 0
|
|
t.Index = 0
|
|
t.IsKey = false
|
|
t.isKey = false
|
|
t.json = b
|
|
t.stack = nil
|
|
t.decoder = decoder{flags: internalParseFlags(b)}
|
|
}
|
|
|
|
// Next returns a new tokenizer pointing at the next token, or the zero-value of
|
|
// Tokenizer if the end of the json input has been reached.
|
|
//
|
|
// If the tokenizer encounters malformed json while reading the input the method
|
|
// sets t.Err to an error describing the issue, and returns false. Once an error
|
|
// has been encountered, the tokenizer will always fail until its input is
|
|
// cleared by a call to its Reset method.
|
|
func (t *Tokenizer) Next() bool {
|
|
if t.Err != nil {
|
|
return false
|
|
}
|
|
|
|
// Inlined code of the skipSpaces function, this give a ~15% speed boost.
|
|
i := 0
|
|
skipLoop:
|
|
for _, c := range t.json {
|
|
switch c {
|
|
case sp, ht, nl, cr:
|
|
i++
|
|
default:
|
|
break skipLoop
|
|
}
|
|
}
|
|
|
|
if i > 0 {
|
|
t.json = t.json[i:]
|
|
}
|
|
|
|
if len(t.json) == 0 {
|
|
t.Reset(nil)
|
|
return false
|
|
}
|
|
|
|
var kind Kind
|
|
switch t.json[0] {
|
|
case '"':
|
|
t.Delim = 0
|
|
t.Value, t.json, kind, t.Err = t.parseString(t.json)
|
|
case 'n':
|
|
t.Delim = 0
|
|
t.Value, t.json, kind, t.Err = t.parseNull(t.json)
|
|
case 't':
|
|
t.Delim = 0
|
|
t.Value, t.json, kind, t.Err = t.parseTrue(t.json)
|
|
case 'f':
|
|
t.Delim = 0
|
|
t.Value, t.json, kind, t.Err = t.parseFalse(t.json)
|
|
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
t.Delim = 0
|
|
t.Value, t.json, kind, t.Err = t.parseNumber(t.json)
|
|
case '{', '}', '[', ']', ':', ',':
|
|
t.Delim, t.Value, t.json = Delim(t.json[0]), t.json[:1], t.json[1:]
|
|
switch t.Delim {
|
|
case '{':
|
|
kind = Object
|
|
case '[':
|
|
kind = Array
|
|
}
|
|
default:
|
|
t.Delim = 0
|
|
t.Value, t.json, t.Err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
|
|
}
|
|
|
|
t.Depth = t.depth()
|
|
t.Index = t.index()
|
|
t.flags = t.flags.withKind(kind)
|
|
|
|
if t.Delim == 0 {
|
|
t.IsKey = t.isKey
|
|
} else {
|
|
t.IsKey = false
|
|
|
|
switch t.Delim {
|
|
case '{':
|
|
t.isKey = true
|
|
t.push(inObject)
|
|
case '[':
|
|
t.push(inArray)
|
|
case '}':
|
|
t.Err = t.pop(inObject)
|
|
t.Depth--
|
|
t.Index = t.index()
|
|
case ']':
|
|
t.Err = t.pop(inArray)
|
|
t.Depth--
|
|
t.Index = t.index()
|
|
case ':':
|
|
t.isKey = false
|
|
case ',':
|
|
if t.stack == nil || len(t.stack.state) == 0 {
|
|
t.Err = syntaxError(t.json, "found unexpected comma")
|
|
return false
|
|
}
|
|
if t.stack.is(inObject) {
|
|
t.isKey = true
|
|
}
|
|
t.stack.state[len(t.stack.state)-1].len++
|
|
}
|
|
}
|
|
|
|
return (t.Delim != 0 || len(t.Value) != 0) && t.Err == nil
|
|
}
|
|
|
|
func (t *Tokenizer) depth() int {
|
|
if t.stack == nil {
|
|
return 0
|
|
}
|
|
return t.stack.depth()
|
|
}
|
|
|
|
func (t *Tokenizer) index() int {
|
|
if t.stack == nil {
|
|
return 0
|
|
}
|
|
return t.stack.index()
|
|
}
|
|
|
|
func (t *Tokenizer) push(typ scope) {
|
|
if t.stack == nil {
|
|
t.stack = acquireStack()
|
|
}
|
|
t.stack.push(typ)
|
|
}
|
|
|
|
func (t *Tokenizer) pop(expect scope) error {
|
|
if t.stack == nil || !t.stack.pop(expect) {
|
|
return syntaxError(t.json, "found unexpected character while tokenizing json input")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Kind returns the kind of the value that the tokenizer is currently positioned
|
|
// on.
|
|
func (t *Tokenizer) Kind() Kind { return t.flags.kind() }
|
|
|
|
// Bool returns a bool containing the value of the json boolean that the
|
|
// tokenizer is currently pointing at.
|
|
//
|
|
// This method must only be called after checking the kind of the token via a
|
|
// call to Kind.
|
|
//
|
|
// If the tokenizer is not positioned on a boolean, the behavior is undefined.
|
|
func (t *Tokenizer) Bool() bool { return t.flags.kind() == True }
|
|
|
|
// Int returns a byte slice containing the value of the json number that the
|
|
// tokenizer is currently pointing at.
|
|
//
|
|
// This method must only be called after checking the kind of the token via a
|
|
// call to Kind.
|
|
//
|
|
// If the tokenizer is not positioned on an integer, the behavior is undefined.
|
|
func (t *Tokenizer) Int() int64 {
|
|
i, _, _ := t.parseInt(t.Value, int64Type)
|
|
return i
|
|
}
|
|
|
|
// Uint returns a byte slice containing the value of the json number that the
|
|
// tokenizer is currently pointing at.
|
|
//
|
|
// This method must only be called after checking the kind of the token via a
|
|
// call to Kind.
|
|
//
|
|
// If the tokenizer is not positioned on a positive integer, the behavior is
|
|
// undefined.
|
|
func (t *Tokenizer) Uint() uint64 {
|
|
u, _, _ := t.parseUint(t.Value, uint64Type)
|
|
return u
|
|
}
|
|
|
|
// Float returns a byte slice containing the value of the json number that the
|
|
// tokenizer is currently pointing at.
|
|
//
|
|
// This method must only be called after checking the kind of the token via a
|
|
// call to Kind.
|
|
//
|
|
// If the tokenizer is not positioned on a number, the behavior is undefined.
|
|
func (t *Tokenizer) Float() float64 {
|
|
f, _ := strconv.ParseFloat(*(*string)(unsafe.Pointer(&t.Value)), 64)
|
|
return f
|
|
}
|
|
|
|
// String returns a byte slice containing the value of the json string that the
|
|
// tokenizer is currently pointing at.
|
|
//
|
|
// This method must only be called after checking the kind of the token via a
|
|
// call to Kind.
|
|
//
|
|
// When possible, the returned byte slice references the backing array of the
|
|
// tokenizer. A new slice is only allocated if the tokenizer needed to unescape
|
|
// the json string.
|
|
//
|
|
// If the tokenizer is not positioned on a string, the behavior is undefined.
|
|
func (t *Tokenizer) String() []byte {
|
|
if t.flags.kind() == Unescaped && len(t.Value) > 1 {
|
|
return t.Value[1 : len(t.Value)-1] // unquote
|
|
}
|
|
s, _, _, _ := t.parseStringUnquote(t.Value, nil)
|
|
return s
|
|
}
|
|
|
|
// RawValue represents a raw json value, it is intended to carry null, true,
|
|
// false, number, and string values only.
|
|
type RawValue []byte
|
|
|
|
// String returns true if v contains a string value.
|
|
func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
|
|
|
|
// Null returns true if v contains a null value.
|
|
func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
|
|
|
|
// True returns true if v contains a true value.
|
|
func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
|
|
|
|
// False returns true if v contains a false value.
|
|
func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
|
|
|
|
// Number returns true if v contains a number value.
|
|
func (v RawValue) Number() bool {
|
|
if len(v) != 0 {
|
|
switch v[0] {
|
|
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// AppendUnquote writes the unquoted version of the string value in v into b.
|
|
func (v RawValue) AppendUnquote(b []byte) []byte {
|
|
d := decoder{}
|
|
s, r, _, err := d.parseStringUnquote(v, b)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if len(r) != 0 {
|
|
panic(syntaxError(r, "unexpected trailing tokens after json value"))
|
|
}
|
|
return append(b, s...)
|
|
}
|
|
|
|
// Unquote returns the unquoted version of the string value in v.
|
|
func (v RawValue) Unquote() []byte {
|
|
return v.AppendUnquote(nil)
|
|
}
|
|
|
|
type scope int
|
|
|
|
const (
|
|
inArray scope = iota
|
|
inObject
|
|
)
|
|
|
|
type state struct {
|
|
typ scope
|
|
len int
|
|
}
|
|
|
|
type stack struct {
|
|
state []state
|
|
}
|
|
|
|
func (s *stack) push(typ scope) {
|
|
s.state = append(s.state, state{typ: typ, len: 1})
|
|
}
|
|
|
|
func (s *stack) pop(expect scope) bool {
|
|
i := len(s.state) - 1
|
|
|
|
if i < 0 {
|
|
return false
|
|
}
|
|
|
|
if found := s.state[i]; expect != found.typ {
|
|
return false
|
|
}
|
|
|
|
s.state = s.state[:i]
|
|
return true
|
|
}
|
|
|
|
func (s *stack) is(typ scope) bool {
|
|
return len(s.state) != 0 && s.state[len(s.state)-1].typ == typ
|
|
}
|
|
|
|
func (s *stack) depth() int {
|
|
return len(s.state)
|
|
}
|
|
|
|
func (s *stack) index() int {
|
|
if len(s.state) == 0 {
|
|
return 0
|
|
}
|
|
return s.state[len(s.state)-1].len - 1
|
|
}
|
|
|
|
func acquireStack() *stack {
|
|
s, _ := stackPool.Get().(*stack)
|
|
if s == nil {
|
|
s = &stack{state: make([]state, 0, 4)}
|
|
} else {
|
|
s.state = s.state[:0]
|
|
}
|
|
return s
|
|
}
|
|
|
|
func releaseStack(s *stack) {
|
|
stackPool.Put(s)
|
|
}
|
|
|
|
var (
|
|
stackPool sync.Pool // *stack
|
|
)
|