2017-07-07 23:34:05 +02:00

111 lines
2.2 KiB
Go

package charset
import (
"encoding/binary"
"errors"
"unicode/utf8"
)
func init() {
registerClass("utf16", fromUTF16, toUTF16)
}
type translateFromUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
data = data[0 : len(data)&^1] // round to even number of bytes.
if len(data) < 2 {
return 0, nil, nil
}
n := 0
if p.first && p.endian == nil {
switch binary.BigEndian.Uint16(data) {
case 0xfeff:
p.endian = binary.BigEndian
data = data[2:]
n += 2
case 0xfffe:
p.endian = binary.LittleEndian
data = data[2:]
n += 2
default:
p.endian = guessEndian(data)
}
p.first = false
}
p.scratch = p.scratch[:0]
for ; len(data) > 0; data = data[2:] {
p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
n += 2
}
return n, p.scratch, nil
}
func guessEndian(data []byte) binary.ByteOrder {
// XXX TODO
return binary.LittleEndian
}
type translateToUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
if p.first {
p.scratch = p.scratch[0:2]
p.endian.PutUint16(p.scratch, 0xfeff)
p.first = false
}
n := 0
for len(data) > 0 {
if !utf8.FullRune(data) && !eof {
break
}
r, size := utf8.DecodeRune(data)
// TODO if r > 65535?
slen := len(p.scratch)
p.scratch = p.scratch[0 : slen+2]
p.endian.PutUint16(p.scratch[slen:], uint16(r))
data = data[size:]
n += size
}
return n, p.scratch, nil
}
func getEndian(arg string) (binary.ByteOrder, error) {
switch arg {
case "le":
return binary.LittleEndian, nil
case "be":
return binary.BigEndian, nil
case "":
return nil, nil
}
return nil, errors.New("charset: unknown utf16 endianness")
}
func fromUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateFromUTF16{first: true, endian: endian}, nil
}
func toUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateToUTF16{first: false, endian: endian}, nil
}