From ccca1092e7bf3ef9b7cabd9c7b2d539f97eb889e Mon Sep 17 00:00:00 2001
From: Christopher Jeffrey <chjjeffrey@gmail.com>
Date: Sun, 26 Apr 2015 04:23:32 -0700
Subject: [PATCH] add regex for all wide chars.

---
 lib/unicode.js | 10 ++++++++++
 lib/widget.js  |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/unicode.js b/lib/unicode.js
index 51a73ef..b21ae77 100644
--- a/lib/unicode.js
+++ b/lib/unicode.js
@@ -549,6 +549,16 @@ exports.wideChars = new RegExp('(['
   + '\\uffe0-\\uffe6'
   + '])', 'g');
 
+exports.allWide = new RegExp('('
+  // 0x20000 - 0x2fffd:
+  + '[\\ud840-\\ud87f][\\udc00-\\udffd]'
+  + '|'
+  // 0x30000 - 0x3fffd:
+  + '[\\ud880-\\ud8bf][\\udc00-\\udffd]'
+  + '|'
+  + exports.wideChars.source.slice(1, -1)
+  + ')', 'g');
+
 // Regex to detect a surrogate pair.
 exports.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g;
 
diff --git a/lib/widget.js b/lib/widget.js
index 803ff4a..f1f7f5e 100644
--- a/lib/widget.js
+++ b/lib/widget.js
@@ -2427,7 +2427,7 @@ Element.prototype.parseContent = function(noTags) {
       content = content.replace(unicode.wideChars, '$1_');
     } else {
       // no double-width: replace them with question-marks.
-      content = content.replace(unicode.wideChars, '??');
+      content = content.replace(unicode.allWide, '??');
       // delete combining characters since they're 0-width anyway.
       // NOTE: We could drop this, the non-surrogates would get changed to ? by
       // the unicode filter, and surrogates changed to ? by the surrogate