From 6ef0550433ec90ba8c4ae9135d4a30f159319f21 Mon Sep 17 00:00:00 2001 From: Franck Royer Date: Fri, 25 Mar 2022 14:34:56 +1100 Subject: [PATCH] Replace custom utf-8 convert implementations with `uint8arrays` --- src/lib/utf8.ts | 84 ++++--------------------------------------------- 1 file changed, 6 insertions(+), 78 deletions(-) diff --git a/src/lib/utf8.ts b/src/lib/utf8.ts index cd8a61c689..3d2db495c0 100644 --- a/src/lib/utf8.ts +++ b/src/lib/utf8.ts @@ -1,85 +1,13 @@ /** * Decode bytes to utf-8 string. */ -// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330 -export function bytesToUtf8(bytes: Uint8Array): string { - let i = 0, - s = ""; - while (i < bytes.length) { - let c = bytes[i++]; - if (c > 127) { - if (c > 191 && c < 224) { - if (i >= bytes.length) - throw new Error("UTF-8 decode: incomplete 2-byte sequence"); - c = ((c & 31) << 6) | (bytes[i++] & 63); - } else if (c > 223 && c < 240) { - if (i + 1 >= bytes.length) - throw new Error("UTF-8 decode: incomplete 3-byte sequence"); - c = ((c & 15) << 12) | ((bytes[i++] & 63) << 6) | (bytes[i++] & 63); - } else if (c > 239 && c < 248) { - if (i + 2 >= bytes.length) - throw new Error("UTF-8 decode: incomplete 4-byte sequence"); - c = - ((c & 7) << 18) | - ((bytes[i++] & 63) << 12) | - ((bytes[i++] & 63) << 6) | - (bytes[i++] & 63); - } else - throw new Error( - "UTF-8 decode: unknown multi byte start 0x" + - c.toString(16) + - " at index " + - (i - 1) - ); - } - if (c <= 0xffff) s += String.fromCharCode(c); - else if (c <= 0x10ffff) { - c -= 0x10000; - s += String.fromCharCode((c >> 10) | 0xd800); - s += String.fromCharCode((c & 0x3ff) | 0xdc00); - } else - throw new Error( - "UTF-8 decode: code point 0x" + c.toString(16) + " exceeds UTF-16 reach" - ); - } - return s; -} +import { fromString } from "uint8arrays/from-string"; +import { toString } from "uint8arrays/to-string"; + +export const bytesToUtf8 = (bytes: Uint8Array): string => + toString(bytes, "utf-8"); /** * Encode utf-8 string to byte array */ -// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330 -export function utf8ToBytes(s: string): Uint8Array { - let i = 0; - const bytes = new Uint8Array(s.length * 4); - for (let ci = 0; ci != s.length; ci++) { - let c = s.charCodeAt(ci); - if (c < 128) { - bytes[i++] = c; - continue; - } - if (c < 2048) { - bytes[i++] = (c >> 6) | 192; - } else { - if (c > 0xd7ff && c < 0xdc00) { - if (++ci >= s.length) - throw new Error("UTF-8 encode: incomplete surrogate pair"); - const c2 = s.charCodeAt(ci); - if (c2 < 0xdc00 || c2 > 0xdfff) - throw new Error( - "UTF-8 encode: second surrogate character 0x" + - c2.toString(16) + - " at index " + - ci + - " out of range" - ); - c = 0x10000 + ((c & 0x03ff) << 10) + (c2 & 0x03ff); - bytes[i++] = (c >> 18) | 240; - bytes[i++] = ((c >> 12) & 63) | 128; - } else bytes[i++] = (c >> 12) | 224; - bytes[i++] = ((c >> 6) & 63) | 128; - } - bytes[i++] = (c & 63) | 128; - } - return bytes.subarray(0, i); -} +export const utf8ToBytes = (s: string): Uint8Array => fromString(s, "utf-8");