// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package seq import "unicode/utf16" // Based heavily on package unicode/utf16 from the Go standard library. const ( replacementChar = '\uFFFD' // Unicode replacement character maxRune = '\U0010FFFF' // Maximum valid Unicode code point. ) const ( // 0xd800-0xdc00 encodes the high 10 bits of a pair. // 0xdc00-0xe000 encodes the low 10 bits of a pair. // the value is those 20 bits plus 0x10000. surr1 = 0xd800 surr2 = 0xdc00 surr3 = 0xe000 surrSelf = 0x10000 ) // UTF16Encode utf16 encodes s into chars. It returns the resulting // length in units of uint16. It is assumed that the chars slice // has enough room for the encoded string. func UTF16Encode(s string, chars []uint16) int { n := 0 for _, v := range s { switch { case v < 0, surr1 <= v && v < surr3, v > maxRune: v = replacementChar fallthrough case v < surrSelf: chars[n] = uint16(v) n += 1 default: // surrogate pair, two uint16 values r1, r2 := utf16.EncodeRune(v) chars[n] = uint16(r1) chars[n+1] = uint16(r2) n += 2 } } return n }