185 lines
4.4 KiB
Go
185 lines
4.4 KiB
Go
// The iconv package provides an interface to the GNU iconv character set
|
|
// conversion library (see http://www.gnu.org/software/libiconv/).
|
|
// It automatically registers all the character sets with the charset package,
|
|
// so it is usually used simply for the side effects of importing it.
|
|
// Example:
|
|
// import (
|
|
// "go-charset.googlecode.com/hg/charset"
|
|
// _ "go-charset.googlecode.com/hg/charset/iconv"
|
|
// )
|
|
package iconv
|
|
|
|
//#cgo darwin LDFLAGS: -liconv
|
|
//#include <stdlib.h>
|
|
//#include <iconv.h>
|
|
//#include <errno.h>
|
|
//iconv_t iconv_open_error = (iconv_t)-1;
|
|
//size_t iconv_error = (size_t)-1;
|
|
import "C"
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"github.com/paulrosania/go-charset/charset"
|
|
"runtime"
|
|
"strings"
|
|
"syscall"
|
|
"unicode/utf8"
|
|
"unsafe"
|
|
)
|
|
|
|
type iconvTranslator struct {
|
|
cd C.iconv_t
|
|
invalid rune
|
|
scratch []byte
|
|
}
|
|
|
|
func canonicalChar(c rune) rune {
|
|
if c >= 'a' && c <= 'z' {
|
|
return c - 'a' + 'A'
|
|
}
|
|
return c
|
|
}
|
|
|
|
func canonicalName(s string) string {
|
|
return strings.Map(canonicalChar, s)
|
|
}
|
|
|
|
func init() {
|
|
charset.Register(iconvFactory{})
|
|
}
|
|
|
|
type iconvFactory struct {
|
|
}
|
|
|
|
func (iconvFactory) TranslatorFrom(name string) (charset.Translator, error) {
|
|
return Translator("UTF-8", name, utf8.RuneError)
|
|
}
|
|
|
|
func (iconvFactory) TranslatorTo(name string) (charset.Translator, error) {
|
|
// BUG This is wrong. The target character set may not be ASCII
|
|
// compatible. There's no easy solution to this other than
|
|
// removing the offending code point.
|
|
return Translator(name, "UTF-8", '?')
|
|
}
|
|
|
|
// Translator returns a Translator that translates between
|
|
// the named character sets. When an invalid multibyte
|
|
// character is found, the bytes in invalid are substituted instead.
|
|
func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) {
|
|
cto, cfrom := C.CString(toCharset), C.CString(fromCharset)
|
|
cd, err := C.iconv_open(cto, cfrom)
|
|
|
|
C.free(unsafe.Pointer(cfrom))
|
|
C.free(unsafe.Pointer(cto))
|
|
|
|
if cd == C.iconv_open_error {
|
|
if err == syscall.EINVAL {
|
|
return nil, errors.New("iconv: conversion not supported")
|
|
}
|
|
return nil, err
|
|
}
|
|
t := &iconvTranslator{cd: cd, invalid: invalid}
|
|
runtime.SetFinalizer(t, func(*iconvTranslator) {
|
|
C.iconv_close(cd)
|
|
})
|
|
return t, nil
|
|
}
|
|
|
|
func (iconvFactory) Names() []string {
|
|
all := aliases()
|
|
names := make([]string, 0, len(all))
|
|
for name, aliases := range all {
|
|
if aliases[0] == name {
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
return names
|
|
}
|
|
|
|
func (iconvFactory) Info(name string) *charset.Charset {
|
|
name = strings.ToLower(name)
|
|
all := aliases()
|
|
a, ok := all[name]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
return &charset.Charset{
|
|
Name: name,
|
|
Aliases: a,
|
|
}
|
|
}
|
|
|
|
func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) {
|
|
n := 0
|
|
p.scratch = p.scratch[:0]
|
|
for len(data) > 0 {
|
|
p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax)
|
|
cData := (*C.char)(unsafe.Pointer(&data[:1][0]))
|
|
nData := C.size_t(len(data))
|
|
|
|
ns := len(p.scratch)
|
|
cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0]))
|
|
nScratch := C.size_t(cap(p.scratch) - ns)
|
|
r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch)
|
|
|
|
p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)]
|
|
n += len(data) - int(nData)
|
|
data = data[len(data)-int(nData):]
|
|
|
|
if r != C.iconv_error || err == nil {
|
|
return n, p.scratch, nil
|
|
}
|
|
switch err := err.(syscall.Errno); err {
|
|
case C.EILSEQ:
|
|
// invalid multibyte sequence - skip one byte and continue
|
|
p.scratch = appendRune(p.scratch, p.invalid)
|
|
n++
|
|
data = data[1:]
|
|
case C.EINVAL:
|
|
// incomplete multibyte sequence
|
|
return n, p.scratch, nil
|
|
case C.E2BIG:
|
|
// output buffer not large enough; try again with larger buffer.
|
|
p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax)
|
|
default:
|
|
panic(fmt.Sprintf("unexpected error code: %v", err))
|
|
}
|
|
}
|
|
return n, p.scratch, nil
|
|
}
|
|
|
|
// ensureCap returns s with a capacity of at least n bytes.
|
|
// If cap(s) < n, then it returns a new copy of s with the
|
|
// required capacity.
|
|
func ensureCap(s []byte, n int) []byte {
|
|
if n <= cap(s) {
|
|
return s
|
|
}
|
|
// logic adapted from appendslice1 in runtime
|
|
m := cap(s)
|
|
if m == 0 {
|
|
m = n
|
|
} else {
|
|
for {
|
|
if m < 1024 {
|
|
m += m
|
|
} else {
|
|
m += m / 4
|
|
}
|
|
if m >= n {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
t := make([]byte, len(s), m)
|
|
copy(t, s)
|
|
return t
|
|
}
|
|
|
|
func appendRune(buf []byte, r rune) []byte {
|
|
n := len(buf)
|
|
buf = ensureCap(buf, n+utf8.UTFMax)
|
|
nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
|
|
return buf[0 : n+nu]
|
|
}
|