134 lines
2.7 KiB
Go
Raw Normal View History

2017-07-07 23:34:05 +02:00
package charset
import (
"fmt"
"unicode/utf8"
)
func init() {
registerClass("cp", fromCodePage, toCodePage)
}
type translateFromCodePage struct {
byte2rune *[256]rune
scratch []byte
}
type cpKeyFrom string
type cpKeyTo string
func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
buf := p.scratch
for _, x := range data {
r := p.byte2rune[x]
if r < utf8.RuneSelf {
buf = append(buf, byte(r))
continue
}
size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
buf = buf[0 : len(buf)+size]
}
return len(data), buf, nil
}
type toCodePageInfo struct {
rune2byte map[rune]byte
// same gives the number of runes at start of code page that map exactly to
// unicode.
same rune
}
type translateToCodePage struct {
toCodePageInfo
scratch []byte
}
func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch, len(data))
buf := p.scratch[:0]
for i := 0; i < len(data); {
r := rune(data[i])
size := 1
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(data[i:])
if size == 1 && !eof && !utf8.FullRune(data[i:]) {
return i, buf, nil
}
}
var b byte
if r < p.same {
b = byte(r)
} else {
var ok bool
b, ok = p.rune2byte[r]
if !ok {
b = '?'
}
}
buf = append(buf, b)
i += size
}
return len(data), buf, nil
}
func fromCodePage(arg string) (Translator, error) {
runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
data, err := readFile(arg)
if err != nil {
return nil, err
}
runes := []rune(string(data))
if len(runes) != 256 {
return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
}
r := new([256]rune)
copy(r[:], runes)
return r, nil
})
if err != nil {
return nil, err
}
return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
}
func toCodePage(arg string) (Translator, error) {
m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
data, err := readFile(arg)
if err != nil {
return nil, err
}
info := toCodePageInfo{
rune2byte: make(map[rune]byte),
same: 256,
}
atStart := true
i := rune(0)
for _, r := range string(data) {
if atStart {
if r == i {
i++
continue
}
info.same = i
atStart = false
}
info.rune2byte[r] = byte(i)
i++
}
// TODO fix tables
// fmt.Printf("%s, same = %d\n", arg, info.same)
if i != 256 {
return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
}
return info, nil
})
if err != nil {
return nil, err
}
return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
}