604 lines
15 KiB
Go
Raw Normal View History

2016-11-01 14:10:05 -04:00
package jmespath
import (
"encoding/json"
"fmt"
"strconv"
"strings"
)
type astNodeType int
//go:generate stringer -type astNodeType
const (
ASTEmpty astNodeType = iota
ASTComparator
ASTCurrentNode
ASTExpRef
ASTFunctionExpression
ASTField
ASTFilterProjection
ASTFlatten
ASTIdentity
ASTIndex
ASTIndexExpression
ASTKeyValPair
ASTLiteral
ASTMultiSelectHash
ASTMultiSelectList
ASTOrExpression
ASTAndExpression
ASTNotExpression
ASTPipe
ASTProjection
ASTSubexpression
ASTSlice
ASTValueProjection
)
// ASTNode represents the abstract syntax tree of a JMESPath expression.
type ASTNode struct {
nodeType astNodeType
value interface{}
children []ASTNode
}
func (node ASTNode) String() string {
return node.PrettyPrint(0)
}
// PrettyPrint will pretty print the parsed AST.
// The AST is an implementation detail and this pretty print
// function is provided as a convenience method to help with
// debugging. You should not rely on its output as the internal
// structure of the AST may change at any time.
func (node ASTNode) PrettyPrint(indent int) string {
spaces := strings.Repeat(" ", indent)
output := fmt.Sprintf("%s%s {\n", spaces, node.nodeType)
nextIndent := indent + 2
if node.value != nil {
if converted, ok := node.value.(fmt.Stringer); ok {
// Account for things like comparator nodes
// that are enums with a String() method.
output += fmt.Sprintf("%svalue: %s\n", strings.Repeat(" ", nextIndent), converted.String())
} else {
output += fmt.Sprintf("%svalue: %#v\n", strings.Repeat(" ", nextIndent), node.value)
}
}
lastIndex := len(node.children)
if lastIndex > 0 {
output += fmt.Sprintf("%schildren: {\n", strings.Repeat(" ", nextIndent))
childIndent := nextIndent + 2
for _, elem := range node.children {
output += elem.PrettyPrint(childIndent)
}
}
output += fmt.Sprintf("%s}\n", spaces)
return output
}
var bindingPowers = map[tokType]int{
tEOF: 0,
tUnquotedIdentifier: 0,
tQuotedIdentifier: 0,
tRbracket: 0,
tRparen: 0,
tComma: 0,
tRbrace: 0,
tNumber: 0,
tCurrent: 0,
tExpref: 0,
tColon: 0,
tPipe: 1,
tOr: 2,
tAnd: 3,
tEQ: 5,
tLT: 5,
tLTE: 5,
tGT: 5,
tGTE: 5,
tNE: 5,
tFlatten: 9,
tStar: 20,
tFilter: 21,
tDot: 40,
tNot: 45,
tLbrace: 50,
tLbracket: 55,
tLparen: 60,
}
// Parser holds state about the current expression being parsed.
type Parser struct {
expression string
tokens []token
index int
}
// NewParser creates a new JMESPath parser.
func NewParser() *Parser {
p := Parser{}
return &p
}
// Parse will compile a JMESPath expression.
func (p *Parser) Parse(expression string) (ASTNode, error) {
lexer := NewLexer()
p.expression = expression
p.index = 0
tokens, err := lexer.tokenize(expression)
if err != nil {
return ASTNode{}, err
}
p.tokens = tokens
parsed, err := p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
if p.current() != tEOF {
return ASTNode{}, p.syntaxError(fmt.Sprintf(
"Unexpected token at the end of the expresssion: %s", p.current()))
}
return parsed, nil
}
func (p *Parser) parseExpression(bindingPower int) (ASTNode, error) {
var err error
leftToken := p.lookaheadToken(0)
p.advance()
leftNode, err := p.nud(leftToken)
if err != nil {
return ASTNode{}, err
}
currentToken := p.current()
for bindingPower < bindingPowers[currentToken] {
p.advance()
leftNode, err = p.led(currentToken, leftNode)
if err != nil {
return ASTNode{}, err
}
currentToken = p.current()
}
return leftNode, nil
}
func (p *Parser) parseIndexExpression() (ASTNode, error) {
if p.lookahead(0) == tColon || p.lookahead(1) == tColon {
return p.parseSliceExpression()
}
indexStr := p.lookaheadToken(0).value
parsedInt, err := strconv.Atoi(indexStr)
if err != nil {
return ASTNode{}, err
}
indexNode := ASTNode{nodeType: ASTIndex, value: parsedInt}
p.advance()
if err := p.match(tRbracket); err != nil {
return ASTNode{}, err
}
return indexNode, nil
}
func (p *Parser) parseSliceExpression() (ASTNode, error) {
parts := []*int{nil, nil, nil}
index := 0
current := p.current()
for current != tRbracket && index < 3 {
if current == tColon {
index++
p.advance()
} else if current == tNumber {
parsedInt, err := strconv.Atoi(p.lookaheadToken(0).value)
if err != nil {
return ASTNode{}, err
}
parts[index] = &parsedInt
p.advance()
} else {
return ASTNode{}, p.syntaxError(
"Expected tColon or tNumber" + ", received: " + p.current().String())
}
current = p.current()
}
if err := p.match(tRbracket); err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTSlice,
value: parts,
}, nil
}
func (p *Parser) match(tokenType tokType) error {
if p.current() == tokenType {
p.advance()
return nil
}
return p.syntaxError("Expected " + tokenType.String() + ", received: " + p.current().String())
}
func (p *Parser) led(tokenType tokType, node ASTNode) (ASTNode, error) {
switch tokenType {
case tDot:
if p.current() != tStar {
right, err := p.parseDotRHS(bindingPowers[tDot])
return ASTNode{
nodeType: ASTSubexpression,
children: []ASTNode{node, right},
}, err
}
p.advance()
right, err := p.parseProjectionRHS(bindingPowers[tDot])
return ASTNode{
nodeType: ASTValueProjection,
children: []ASTNode{node, right},
}, err
case tPipe:
right, err := p.parseExpression(bindingPowers[tPipe])
return ASTNode{nodeType: ASTPipe, children: []ASTNode{node, right}}, err
case tOr:
right, err := p.parseExpression(bindingPowers[tOr])
return ASTNode{nodeType: ASTOrExpression, children: []ASTNode{node, right}}, err
case tAnd:
right, err := p.parseExpression(bindingPowers[tAnd])
return ASTNode{nodeType: ASTAndExpression, children: []ASTNode{node, right}}, err
case tLparen:
name := node.value
var args []ASTNode
for p.current() != tRparen {
expression, err := p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
if p.current() == tComma {
if err := p.match(tComma); err != nil {
return ASTNode{}, err
}
}
args = append(args, expression)
}
if err := p.match(tRparen); err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTFunctionExpression,
value: name,
children: args,
}, nil
case tFilter:
return p.parseFilter(node)
case tFlatten:
left := ASTNode{nodeType: ASTFlatten, children: []ASTNode{node}}
right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
return ASTNode{
nodeType: ASTProjection,
children: []ASTNode{left, right},
}, err
case tEQ, tNE, tGT, tGTE, tLT, tLTE:
right, err := p.parseExpression(bindingPowers[tokenType])
if err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTComparator,
value: tokenType,
children: []ASTNode{node, right},
}, nil
case tLbracket:
tokenType := p.current()
var right ASTNode
var err error
if tokenType == tNumber || tokenType == tColon {
right, err = p.parseIndexExpression()
if err != nil {
return ASTNode{}, err
}
return p.projectIfSlice(node, right)
}
// Otherwise this is a projection.
if err := p.match(tStar); err != nil {
return ASTNode{}, err
}
if err := p.match(tRbracket); err != nil {
return ASTNode{}, err
}
right, err = p.parseProjectionRHS(bindingPowers[tStar])
if err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTProjection,
children: []ASTNode{node, right},
}, nil
}
return ASTNode{}, p.syntaxError("Unexpected token: " + tokenType.String())
}
func (p *Parser) nud(token token) (ASTNode, error) {
switch token.tokenType {
case tJSONLiteral:
var parsed interface{}
err := json.Unmarshal([]byte(token.value), &parsed)
if err != nil {
return ASTNode{}, err
}
return ASTNode{nodeType: ASTLiteral, value: parsed}, nil
case tStringLiteral:
return ASTNode{nodeType: ASTLiteral, value: token.value}, nil
case tUnquotedIdentifier:
return ASTNode{
nodeType: ASTField,
value: token.value,
}, nil
case tQuotedIdentifier:
node := ASTNode{nodeType: ASTField, value: token.value}
if p.current() == tLparen {
return ASTNode{}, p.syntaxErrorToken("Can't have quoted identifier as function name.", token)
}
return node, nil
case tStar:
left := ASTNode{nodeType: ASTIdentity}
var right ASTNode
var err error
if p.current() == tRbracket {
right = ASTNode{nodeType: ASTIdentity}
} else {
right, err = p.parseProjectionRHS(bindingPowers[tStar])
}
return ASTNode{nodeType: ASTValueProjection, children: []ASTNode{left, right}}, err
case tFilter:
return p.parseFilter(ASTNode{nodeType: ASTIdentity})
case tLbrace:
return p.parseMultiSelectHash()
case tFlatten:
left := ASTNode{
nodeType: ASTFlatten,
children: []ASTNode{{nodeType: ASTIdentity}},
}
right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
if err != nil {
return ASTNode{}, err
}
return ASTNode{nodeType: ASTProjection, children: []ASTNode{left, right}}, nil
case tLbracket:
tokenType := p.current()
//var right ASTNode
if tokenType == tNumber || tokenType == tColon {
right, err := p.parseIndexExpression()
if err != nil {
return ASTNode{}, nil
}
return p.projectIfSlice(ASTNode{nodeType: ASTIdentity}, right)
} else if tokenType == tStar && p.lookahead(1) == tRbracket {
p.advance()
p.advance()
right, err := p.parseProjectionRHS(bindingPowers[tStar])
if err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTProjection,
children: []ASTNode{{nodeType: ASTIdentity}, right},
}, nil
} else {
return p.parseMultiSelectList()
}
case tCurrent:
return ASTNode{nodeType: ASTCurrentNode}, nil
case tExpref:
expression, err := p.parseExpression(bindingPowers[tExpref])
if err != nil {
return ASTNode{}, err
}
return ASTNode{nodeType: ASTExpRef, children: []ASTNode{expression}}, nil
case tNot:
expression, err := p.parseExpression(bindingPowers[tNot])
if err != nil {
return ASTNode{}, err
}
return ASTNode{nodeType: ASTNotExpression, children: []ASTNode{expression}}, nil
case tLparen:
expression, err := p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
if err := p.match(tRparen); err != nil {
return ASTNode{}, err
}
return expression, nil
case tEOF:
return ASTNode{}, p.syntaxErrorToken("Incomplete expression", token)
}
return ASTNode{}, p.syntaxErrorToken("Invalid token: "+token.tokenType.String(), token)
}
func (p *Parser) parseMultiSelectList() (ASTNode, error) {
var expressions []ASTNode
for {
expression, err := p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
expressions = append(expressions, expression)
if p.current() == tRbracket {
break
}
err = p.match(tComma)
if err != nil {
return ASTNode{}, err
}
}
err := p.match(tRbracket)
if err != nil {
return ASTNode{}, err
}
return ASTNode{
nodeType: ASTMultiSelectList,
children: expressions,
}, nil
}
func (p *Parser) parseMultiSelectHash() (ASTNode, error) {
var children []ASTNode
for {
keyToken := p.lookaheadToken(0)
if err := p.match(tUnquotedIdentifier); err != nil {
if err := p.match(tQuotedIdentifier); err != nil {
return ASTNode{}, p.syntaxError("Expected tQuotedIdentifier or tUnquotedIdentifier")
}
}
keyName := keyToken.value
err := p.match(tColon)
if err != nil {
return ASTNode{}, err
}
value, err := p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
node := ASTNode{
nodeType: ASTKeyValPair,
value: keyName,
children: []ASTNode{value},
}
children = append(children, node)
if p.current() == tComma {
err := p.match(tComma)
if err != nil {
return ASTNode{}, nil
}
} else if p.current() == tRbrace {
err := p.match(tRbrace)
if err != nil {
return ASTNode{}, nil
}
break
}
}
return ASTNode{
nodeType: ASTMultiSelectHash,
children: children,
}, nil
}
func (p *Parser) projectIfSlice(left ASTNode, right ASTNode) (ASTNode, error) {
indexExpr := ASTNode{
nodeType: ASTIndexExpression,
children: []ASTNode{left, right},
}
if right.nodeType == ASTSlice {
right, err := p.parseProjectionRHS(bindingPowers[tStar])
return ASTNode{
nodeType: ASTProjection,
children: []ASTNode{indexExpr, right},
}, err
}
return indexExpr, nil
}
func (p *Parser) parseFilter(node ASTNode) (ASTNode, error) {
var right, condition ASTNode
var err error
condition, err = p.parseExpression(0)
if err != nil {
return ASTNode{}, err
}
if err := p.match(tRbracket); err != nil {
return ASTNode{}, err
}
if p.current() == tFlatten {
right = ASTNode{nodeType: ASTIdentity}
} else {
right, err = p.parseProjectionRHS(bindingPowers[tFilter])
if err != nil {
return ASTNode{}, err
}
}
return ASTNode{
nodeType: ASTFilterProjection,
children: []ASTNode{node, right, condition},
}, nil
}
func (p *Parser) parseDotRHS(bindingPower int) (ASTNode, error) {
lookahead := p.current()
if tokensOneOf([]tokType{tQuotedIdentifier, tUnquotedIdentifier, tStar}, lookahead) {
return p.parseExpression(bindingPower)
} else if lookahead == tLbracket {
if err := p.match(tLbracket); err != nil {
return ASTNode{}, err
}
return p.parseMultiSelectList()
} else if lookahead == tLbrace {
if err := p.match(tLbrace); err != nil {
return ASTNode{}, err
}
return p.parseMultiSelectHash()
}
return ASTNode{}, p.syntaxError("Expected identifier, lbracket, or lbrace")
}
func (p *Parser) parseProjectionRHS(bindingPower int) (ASTNode, error) {
current := p.current()
if bindingPowers[current] < 10 {
return ASTNode{nodeType: ASTIdentity}, nil
} else if current == tLbracket {
return p.parseExpression(bindingPower)
} else if current == tFilter {
return p.parseExpression(bindingPower)
} else if current == tDot {
err := p.match(tDot)
if err != nil {
return ASTNode{}, err
}
return p.parseDotRHS(bindingPower)
} else {
return ASTNode{}, p.syntaxError("Error")
}
}
func (p *Parser) lookahead(number int) tokType {
return p.lookaheadToken(number).tokenType
}
func (p *Parser) current() tokType {
return p.lookahead(0)
}
func (p *Parser) lookaheadToken(number int) token {
return p.tokens[p.index+number]
}
func (p *Parser) advance() {
p.index++
}
func tokensOneOf(elements []tokType, token tokType) bool {
for _, elem := range elements {
if elem == token {
return true
}
}
return false
}
func (p *Parser) syntaxError(msg string) SyntaxError {
return SyntaxError{
msg: msg,
Expression: p.expression,
Offset: p.lookaheadToken(0).position,
}
}
// Create a SyntaxError based on the provided token.
// This differs from syntaxError() which creates a SyntaxError
// based on the current lookahead token.
func (p *Parser) syntaxErrorToken(msg string, t token) SyntaxError {
return SyntaxError{
msg: msg,
Expression: p.expression,
Offset: t.position,
}
}