mobile/bind/seq/string.go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package seq

import "unicode/utf16"

// Based heavily on package unicode/utf16 from the Go standard library.

const (
	replacementChar = '\uFFFD'     // Unicode replacement character
	maxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
)

const (
	// 0xd800-0xdc00 encodes the high 10 bits of a pair.
	// 0xdc00-0xe000 encodes the low 10 bits of a pair.
	// the value is those 20 bits plus 0x10000.
	surr1 = 0xd800
	surr2 = 0xdc00
	surr3 = 0xe000

	surrSelf = 0x10000
)

// UTF16Encode utf16 encodes s into chars. It returns the resulting
// length in units of uint16. It is assumed that the chars slice
// has enough room for the encoded string.
func UTF16Encode(s string, chars []uint16) int {
	n := 0
	for _, v := range s {
		switch {
		case v < 0, surr1 <= v && v < surr3, v > maxRune:
			v = replacementChar
			fallthrough
		case v < surrSelf:
			chars[n] = uint16(v)
			n += 1
		default:
			// surrogate pair, two uint16 values
			r1, r2 := utf16.EncodeRune(v)
			chars[n] = uint16(r1)
			chars[n+1] = uint16(r2)
			n += 2
		}
	}
	return n
}