Replace custom utf-8 convert implementations with `uint8arrays`

This commit is contained in:
Franck Royer 2022-03-25 14:34:56 +11:00
parent 33e6c3fe39
commit 6ef0550433
No known key found for this signature in database
GPG Key ID: A82ED75A8DFC50A4
1 changed files with 6 additions and 78 deletions

View File

@ -1,85 +1,13 @@
/**
* Decode bytes to utf-8 string.
*/
// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
export function bytesToUtf8(bytes: Uint8Array): string {
let i = 0,
s = "";
while (i < bytes.length) {
let c = bytes[i++];
if (c > 127) {
if (c > 191 && c < 224) {
if (i >= bytes.length)
throw new Error("UTF-8 decode: incomplete 2-byte sequence");
c = ((c & 31) << 6) | (bytes[i++] & 63);
} else if (c > 223 && c < 240) {
if (i + 1 >= bytes.length)
throw new Error("UTF-8 decode: incomplete 3-byte sequence");
c = ((c & 15) << 12) | ((bytes[i++] & 63) << 6) | (bytes[i++] & 63);
} else if (c > 239 && c < 248) {
if (i + 2 >= bytes.length)
throw new Error("UTF-8 decode: incomplete 4-byte sequence");
c =
((c & 7) << 18) |
((bytes[i++] & 63) << 12) |
((bytes[i++] & 63) << 6) |
(bytes[i++] & 63);
} else
throw new Error(
"UTF-8 decode: unknown multi byte start 0x" +
c.toString(16) +
" at index " +
(i - 1)
);
}
if (c <= 0xffff) s += String.fromCharCode(c);
else if (c <= 0x10ffff) {
c -= 0x10000;
s += String.fromCharCode((c >> 10) | 0xd800);
s += String.fromCharCode((c & 0x3ff) | 0xdc00);
} else
throw new Error(
"UTF-8 decode: code point 0x" + c.toString(16) + " exceeds UTF-16 reach"
);
}
return s;
}
import { fromString } from "uint8arrays/from-string";
import { toString } from "uint8arrays/to-string";
export const bytesToUtf8 = (bytes: Uint8Array): string =>
toString(bytes, "utf-8");
/**
* Encode utf-8 string to byte array
*/
// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
export function utf8ToBytes(s: string): Uint8Array {
let i = 0;
const bytes = new Uint8Array(s.length * 4);
for (let ci = 0; ci != s.length; ci++) {
let c = s.charCodeAt(ci);
if (c < 128) {
bytes[i++] = c;
continue;
}
if (c < 2048) {
bytes[i++] = (c >> 6) | 192;
} else {
if (c > 0xd7ff && c < 0xdc00) {
if (++ci >= s.length)
throw new Error("UTF-8 encode: incomplete surrogate pair");
const c2 = s.charCodeAt(ci);
if (c2 < 0xdc00 || c2 > 0xdfff)
throw new Error(
"UTF-8 encode: second surrogate character 0x" +
c2.toString(16) +
" at index " +
ci +
" out of range"
);
c = 0x10000 + ((c & 0x03ff) << 10) + (c2 & 0x03ff);
bytes[i++] = (c >> 18) | 240;
bytes[i++] = ((c >> 12) & 63) | 128;
} else bytes[i++] = (c >> 12) | 224;
bytes[i++] = ((c >> 6) & 63) | 128;
}
bytes[i++] = (c & 63) | 128;
}
return bytes.subarray(0, i);
}
export const utf8ToBytes = (s: string): Uint8Array => fromString(s, "utf-8");