Replace custom utf-8 convert implementations with uint8arrays

2025-02-04 16:45:09 +00:00 · 2022-03-25 14:34:56 +11:00 · 2022-03-25 14:34:56 +11:00 · 6ef0550433
commit 6ef0550433
parent 33e6c3fe39
1 changed files with 6 additions and 78 deletions
--- a/src/lib/utf8.ts
+++ b/src/lib/utf8.ts
@ -1,85 +1,13 @@
 /**
 * Decode bytes to utf-8 string.
 */
-// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
-export function bytesToUtf8(bytes: Uint8Array): string {
-  let i = 0,
-    s = "";
-  while (i < bytes.length) {
-    let c = bytes[i++];
-    if (c > 127) {
-      if (c > 191 && c < 224) {
-        if (i >= bytes.length)
-          throw new Error("UTF-8 decode: incomplete 2-byte sequence");
-        c = ((c & 31) << 6) | (bytes[i++] & 63);
-      } else if (c > 223 && c < 240) {
-        if (i + 1 >= bytes.length)
-          throw new Error("UTF-8 decode: incomplete 3-byte sequence");
-        c = ((c & 15) << 12) | ((bytes[i++] & 63) << 6) | (bytes[i++] & 63);
-      } else if (c > 239 && c < 248) {
-        if (i + 2 >= bytes.length)
-          throw new Error("UTF-8 decode: incomplete 4-byte sequence");
-        c =
-          ((c & 7) << 18) |
-          ((bytes[i++] & 63) << 12) |
-          ((bytes[i++] & 63) << 6) |
-          (bytes[i++] & 63);
-      } else
-        throw new Error(
-          "UTF-8 decode: unknown multi byte start 0x" +
-            c.toString(16) +
-            " at index " +
-            (i - 1)
-        );
-    }
-    if (c <= 0xffff) s += String.fromCharCode(c);
-    else if (c <= 0x10ffff) {
-      c -= 0x10000;
-      s += String.fromCharCode((c >> 10) | 0xd800);
-      s += String.fromCharCode((c & 0x3ff) | 0xdc00);
-    } else
-      throw new Error(
-        "UTF-8 decode: code point 0x" + c.toString(16) + " exceeds UTF-16 reach"
-      );
-  }
-  return s;
-}
+import { fromString } from "uint8arrays/from-string";
+import { toString } from "uint8arrays/to-string";
+
+export const bytesToUtf8 = (bytes: Uint8Array): string =>
+  toString(bytes, "utf-8");

 /**
 * Encode utf-8 string to byte array
 */
-// Thanks https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
-export function utf8ToBytes(s: string): Uint8Array {
-  let i = 0;
-  const bytes = new Uint8Array(s.length * 4);
-  for (let ci = 0; ci != s.length; ci++) {
-    let c = s.charCodeAt(ci);
-    if (c < 128) {
-      bytes[i++] = c;
-      continue;
-    }
-    if (c < 2048) {
-      bytes[i++] = (c >> 6) | 192;
-    } else {
-      if (c > 0xd7ff && c < 0xdc00) {
-        if (++ci >= s.length)
-          throw new Error("UTF-8 encode: incomplete surrogate pair");
-        const c2 = s.charCodeAt(ci);
-        if (c2 < 0xdc00 || c2 > 0xdfff)
-          throw new Error(
-            "UTF-8 encode: second surrogate character 0x" +
-              c2.toString(16) +
-              " at index " +
-              ci +
-              " out of range"
-          );
-        c = 0x10000 + ((c & 0x03ff) << 10) + (c2 & 0x03ff);
-        bytes[i++] = (c >> 18) | 240;
-        bytes[i++] = ((c >> 12) & 63) | 128;
-      } else bytes[i++] = (c >> 12) | 224;
-      bytes[i++] = ((c >> 6) & 63) | 128;
-    }
-    bytes[i++] = (c & 63) | 128;
-  }
-  return bytes.subarray(0, i);
-}
+export const utf8ToBytes = (s: string): Uint8Array => fromString(s, "utf-8");