mirror of
https://github.com/status-im/matterbridge.git
synced 2025-01-16 01:14:12 +00:00
522 lines
9.9 KiB
Go
522 lines
9.9 KiB
Go
package dataurl
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type item struct {
|
|
t itemType
|
|
val string
|
|
}
|
|
|
|
func (i item) String() string {
|
|
switch i.t {
|
|
case itemEOF:
|
|
return "EOF"
|
|
case itemError:
|
|
return i.val
|
|
}
|
|
if len(i.val) > 10 {
|
|
return fmt.Sprintf("%.10q...", i.val)
|
|
}
|
|
return fmt.Sprintf("%q", i.val)
|
|
}
|
|
|
|
type itemType int
|
|
|
|
const (
|
|
itemError itemType = iota
|
|
itemEOF
|
|
|
|
itemDataPrefix
|
|
|
|
itemMediaType
|
|
itemMediaSep
|
|
itemMediaSubType
|
|
itemParamSemicolon
|
|
itemParamAttr
|
|
itemParamEqual
|
|
itemLeftStringQuote
|
|
itemRightStringQuote
|
|
itemParamVal
|
|
|
|
itemBase64Enc
|
|
|
|
itemDataComma
|
|
itemData
|
|
)
|
|
|
|
const eof rune = -1
|
|
|
|
func isTokenRune(r rune) bool {
|
|
return r <= unicode.MaxASCII &&
|
|
!unicode.IsControl(r) &&
|
|
!unicode.IsSpace(r) &&
|
|
!isTSpecialRune(r)
|
|
}
|
|
|
|
func isTSpecialRune(r rune) bool {
|
|
return r == '(' ||
|
|
r == ')' ||
|
|
r == '<' ||
|
|
r == '>' ||
|
|
r == '@' ||
|
|
r == ',' ||
|
|
r == ';' ||
|
|
r == ':' ||
|
|
r == '\\' ||
|
|
r == '"' ||
|
|
r == '/' ||
|
|
r == '[' ||
|
|
r == ']' ||
|
|
r == '?' ||
|
|
r == '='
|
|
}
|
|
|
|
// See http://tools.ietf.org/html/rfc2045
|
|
// This doesn't include extension-token case
|
|
// as it's handled separatly
|
|
func isDiscreteType(s string) bool {
|
|
if strings.HasPrefix(s, "text") ||
|
|
strings.HasPrefix(s, "image") ||
|
|
strings.HasPrefix(s, "audio") ||
|
|
strings.HasPrefix(s, "video") ||
|
|
strings.HasPrefix(s, "application") {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// See http://tools.ietf.org/html/rfc2045
|
|
// This doesn't include extension-token case
|
|
// as it's handled separatly
|
|
func isCompositeType(s string) bool {
|
|
if strings.HasPrefix(s, "message") ||
|
|
strings.HasPrefix(s, "multipart") {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isURLCharRune(r rune) bool {
|
|
// We're a bit permissive here,
|
|
// by not including '%' in delims
|
|
// This is okay, since url unescaping will validate
|
|
// that later in the parser.
|
|
return r <= unicode.MaxASCII &&
|
|
!(r >= 0x00 && r <= 0x1F) && r != 0x7F && /* control */
|
|
// delims
|
|
r != ' ' &&
|
|
r != '<' &&
|
|
r != '>' &&
|
|
r != '#' &&
|
|
r != '"' &&
|
|
// unwise
|
|
r != '{' &&
|
|
r != '}' &&
|
|
r != '|' &&
|
|
r != '\\' &&
|
|
r != '^' &&
|
|
r != '[' &&
|
|
r != ']' &&
|
|
r != '`'
|
|
}
|
|
|
|
func isBase64Rune(r rune) bool {
|
|
return (r >= 'a' && r <= 'z') ||
|
|
(r >= 'A' && r <= 'Z') ||
|
|
(r >= '0' && r <= '9') ||
|
|
r == '+' ||
|
|
r == '/' ||
|
|
r == '=' ||
|
|
r == '\n'
|
|
}
|
|
|
|
type stateFn func(*lexer) stateFn
|
|
|
|
// lexer lexes the data URL scheme input string.
|
|
// The implementation is from the text/template/parser package.
|
|
type lexer struct {
|
|
input string
|
|
start int
|
|
pos int
|
|
width int
|
|
seenBase64Item bool
|
|
items chan item
|
|
}
|
|
|
|
func (l *lexer) run() {
|
|
for state := lexBeforeDataPrefix; state != nil; {
|
|
state = state(l)
|
|
}
|
|
close(l.items)
|
|
}
|
|
|
|
func (l *lexer) emit(t itemType) {
|
|
l.items <- item{t, l.input[l.start:l.pos]}
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *lexer) next() (r rune) {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
return eof
|
|
}
|
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
|
l.pos += l.width
|
|
return r
|
|
}
|
|
|
|
func (l *lexer) backup() {
|
|
l.pos -= l.width
|
|
}
|
|
|
|
func (l *lexer) ignore() {
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
|
|
l.items <- item{itemError, fmt.Sprintf(format, args...)}
|
|
return nil
|
|
}
|
|
|
|
func lex(input string) *lexer {
|
|
l := &lexer{
|
|
input: input,
|
|
items: make(chan item),
|
|
}
|
|
go l.run() // Concurrently run state machine.
|
|
return l
|
|
}
|
|
|
|
const (
|
|
dataPrefix = "data:"
|
|
mediaSep = '/'
|
|
paramSemicolon = ';'
|
|
paramEqual = '='
|
|
dataComma = ','
|
|
)
|
|
|
|
// start lexing by detecting data prefix
|
|
func lexBeforeDataPrefix(l *lexer) stateFn {
|
|
if strings.HasPrefix(l.input[l.pos:], dataPrefix) {
|
|
return lexDataPrefix
|
|
}
|
|
return l.errorf("missing data prefix")
|
|
}
|
|
|
|
// lex data prefix
|
|
func lexDataPrefix(l *lexer) stateFn {
|
|
l.pos += len(dataPrefix)
|
|
l.emit(itemDataPrefix)
|
|
return lexAfterDataPrefix
|
|
}
|
|
|
|
// lex what's after data prefix.
|
|
// it can be the media type/subtype separator,
|
|
// the base64 encoding, or the comma preceding the data
|
|
func lexAfterDataPrefix(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r == paramSemicolon:
|
|
l.backup()
|
|
return lexParamSemicolon
|
|
case r == dataComma:
|
|
l.backup()
|
|
return lexDataComma
|
|
case r == eof:
|
|
return l.errorf("missing comma before data")
|
|
case r == 'x' || r == 'X':
|
|
if l.next() == '-' {
|
|
return lexXTokenMediaType
|
|
}
|
|
return lexInDiscreteMediaType
|
|
case isTokenRune(r):
|
|
return lexInDiscreteMediaType
|
|
default:
|
|
return l.errorf("invalid character after data prefix")
|
|
}
|
|
}
|
|
|
|
func lexXTokenMediaType(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == mediaSep:
|
|
l.backup()
|
|
return lexMediaType
|
|
case r == eof:
|
|
return l.errorf("missing media type slash")
|
|
case isTokenRune(r):
|
|
default:
|
|
return l.errorf("invalid character for media type")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexInDiscreteMediaType(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == mediaSep:
|
|
l.backup()
|
|
// check it's valid discrete type
|
|
if !isDiscreteType(l.input[l.start:l.pos]) &&
|
|
!isCompositeType(l.input[l.start:l.pos]) {
|
|
return l.errorf("invalid media type")
|
|
}
|
|
return lexMediaType
|
|
case r == eof:
|
|
return l.errorf("missing media type slash")
|
|
case isTokenRune(r):
|
|
default:
|
|
return l.errorf("invalid character for media type")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexMediaType(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
l.emit(itemMediaType)
|
|
}
|
|
return lexMediaSep
|
|
}
|
|
|
|
func lexMediaSep(l *lexer) stateFn {
|
|
l.next()
|
|
l.emit(itemMediaSep)
|
|
return lexAfterMediaSep
|
|
}
|
|
|
|
func lexAfterMediaSep(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == paramSemicolon || r == dataComma:
|
|
l.backup()
|
|
return lexMediaSubType
|
|
case r == eof:
|
|
return l.errorf("incomplete media type")
|
|
case isTokenRune(r):
|
|
default:
|
|
return l.errorf("invalid character for media subtype")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexMediaSubType(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
l.emit(itemMediaSubType)
|
|
}
|
|
return lexAfterMediaSubType
|
|
}
|
|
|
|
func lexAfterMediaSubType(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r == paramSemicolon:
|
|
l.backup()
|
|
return lexParamSemicolon
|
|
case r == dataComma:
|
|
l.backup()
|
|
return lexDataComma
|
|
case r == eof:
|
|
return l.errorf("missing comma before data")
|
|
default:
|
|
return l.errorf("expected semicolon or comma")
|
|
}
|
|
}
|
|
|
|
func lexParamSemicolon(l *lexer) stateFn {
|
|
l.next()
|
|
l.emit(itemParamSemicolon)
|
|
return lexAfterParamSemicolon
|
|
}
|
|
|
|
func lexAfterParamSemicolon(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r == eof:
|
|
return l.errorf("unterminated parameter sequence")
|
|
case r == paramEqual || r == dataComma:
|
|
return l.errorf("unterminated parameter sequence")
|
|
case isTokenRune(r):
|
|
l.backup()
|
|
return lexInParamAttr
|
|
default:
|
|
return l.errorf("invalid character for parameter attribute")
|
|
}
|
|
}
|
|
|
|
func lexBase64Enc(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
if v := l.input[l.start:l.pos]; v != "base64" {
|
|
return l.errorf("expected base64, got %s", v)
|
|
}
|
|
l.seenBase64Item = true
|
|
l.emit(itemBase64Enc)
|
|
}
|
|
return lexDataComma
|
|
}
|
|
|
|
func lexInParamAttr(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == paramEqual:
|
|
l.backup()
|
|
return lexParamAttr
|
|
case r == dataComma:
|
|
l.backup()
|
|
return lexBase64Enc
|
|
case r == eof:
|
|
return l.errorf("unterminated parameter sequence")
|
|
case isTokenRune(r):
|
|
default:
|
|
return l.errorf("invalid character for parameter attribute")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexParamAttr(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
l.emit(itemParamAttr)
|
|
}
|
|
return lexParamEqual
|
|
}
|
|
|
|
func lexParamEqual(l *lexer) stateFn {
|
|
l.next()
|
|
l.emit(itemParamEqual)
|
|
return lexAfterParamEqual
|
|
}
|
|
|
|
func lexAfterParamEqual(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r == '"':
|
|
l.emit(itemLeftStringQuote)
|
|
return lexInQuotedStringParamVal
|
|
case r == eof:
|
|
return l.errorf("missing comma before data")
|
|
case isTokenRune(r):
|
|
return lexInParamVal
|
|
default:
|
|
return l.errorf("invalid character for parameter value")
|
|
}
|
|
}
|
|
|
|
func lexInQuotedStringParamVal(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == eof:
|
|
return l.errorf("unclosed quoted string")
|
|
case r == '\\':
|
|
return lexEscapedChar
|
|
case r == '"':
|
|
l.backup()
|
|
return lexQuotedStringParamVal
|
|
case r <= unicode.MaxASCII:
|
|
default:
|
|
return l.errorf("invalid character for parameter value")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexEscapedChar(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r <= unicode.MaxASCII:
|
|
return lexInQuotedStringParamVal
|
|
case r == eof:
|
|
return l.errorf("unexpected eof")
|
|
default:
|
|
return l.errorf("invalid escaped character")
|
|
}
|
|
}
|
|
|
|
func lexInParamVal(l *lexer) stateFn {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == paramSemicolon || r == dataComma:
|
|
l.backup()
|
|
return lexParamVal
|
|
case r == eof:
|
|
return l.errorf("missing comma before data")
|
|
case isTokenRune(r):
|
|
default:
|
|
return l.errorf("invalid character for parameter value")
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexQuotedStringParamVal(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
l.emit(itemParamVal)
|
|
}
|
|
l.next()
|
|
l.emit(itemRightStringQuote)
|
|
return lexAfterParamVal
|
|
}
|
|
|
|
func lexParamVal(l *lexer) stateFn {
|
|
if l.pos > l.start {
|
|
l.emit(itemParamVal)
|
|
}
|
|
return lexAfterParamVal
|
|
}
|
|
|
|
func lexAfterParamVal(l *lexer) stateFn {
|
|
switch r := l.next(); {
|
|
case r == paramSemicolon:
|
|
l.backup()
|
|
return lexParamSemicolon
|
|
case r == dataComma:
|
|
l.backup()
|
|
return lexDataComma
|
|
case r == eof:
|
|
return l.errorf("missing comma before data")
|
|
default:
|
|
return l.errorf("expected semicolon or comma")
|
|
}
|
|
}
|
|
|
|
func lexDataComma(l *lexer) stateFn {
|
|
l.next()
|
|
l.emit(itemDataComma)
|
|
if l.seenBase64Item {
|
|
return lexBase64Data
|
|
}
|
|
return lexData
|
|
}
|
|
|
|
func lexData(l *lexer) stateFn {
|
|
Loop:
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == eof:
|
|
break Loop
|
|
case isURLCharRune(r):
|
|
default:
|
|
return l.errorf("invalid data character")
|
|
}
|
|
}
|
|
if l.pos > l.start {
|
|
l.emit(itemData)
|
|
}
|
|
l.emit(itemEOF)
|
|
return nil
|
|
}
|
|
|
|
func lexBase64Data(l *lexer) stateFn {
|
|
Loop:
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == eof:
|
|
break Loop
|
|
case isBase64Rune(r):
|
|
default:
|
|
return l.errorf("invalid data character")
|
|
}
|
|
}
|
|
if l.pos > l.start {
|
|
l.emit(itemData)
|
|
}
|
|
l.emit(itemEOF)
|
|
return nil
|
|
}
|