rename unicode regexes again.

2025-02-26 09:35:28 +00:00 · 2015-04-26 04:34:32 -07:00 · 2015-04-26 04:34:32 -07:00 · c73fee2f7d
commit c73fee2f7d
parent dafe95d900
2 changed files with 18 additions and 12 deletions
--- a/lib/unicode.js
+++ b/lib/unicode.js
@ -532,12 +532,18 @@ exports.fromCodePoint = function() {
  return result;
 };

+/**
+ * Regexes
+ */
+
+exports.chars = {};
+
 // Double width characters that are _not_ surrogate pairs.
 // NOTE: 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are not necessary for this
 // regex anyway. This regex is used to put a blank char after wide chars to
 // be eaten, however, if this is a surrogate pair, parseContent already adds
 // the extra one char because its length equals 2 instead of 1.
-exports.wideChars = new RegExp('(['
+exports.chars.wide = new RegExp('(['
  + '\\u1100-\\u115f' // Hangul Jamo init. consonants
  + '\\u2329\\u232a'
  + '\\u2e80-\\u303e\\u3040-\\ua4cf' // CJK ... Yi
@ -550,21 +556,21 @@ exports.wideChars = new RegExp('(['
  + '])', 'g');

 // All wide chars including surrogate pairs.
-exports.allWideChars = new RegExp('('
+exports.chars.all = new RegExp('('
  // 0x20000 - 0x2fffd:
  + '[\\ud840-\\ud87f][\\udc00-\\udffd]'
  + '|'
  // 0x30000 - 0x3fffd:
  + '[\\ud880-\\ud8bf][\\udc00-\\udffd]'
  + '|'
-  + exports.wideChars.source.slice(1, -1)
+  + exports.chars.wide.source.slice(1, -1)
  + ')', 'g');

 // Regex to detect a surrogate pair.
-exports.surrogateChars = /[\ud800-\udbff][\udc00-\udfff]/g;
+exports.chars.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g;

 // Regex to find combining characters.
-exports.combiningChars = exports.combiningTable.reduce(function(out, row) {
+exports.chars.combining = exports.combiningTable.reduce(function(out, row) {
  var low, high, range;
  if (row[0] > 0x00ffff) {
    low = exports.fromCodePoint(row[0]);
@ -591,7 +597,7 @@ exports.combiningChars = exports.combiningTable.reduce(function(out, row) {
  return out;
 }, '[');

-exports.combiningChars = new RegExp(exports.combiningChars, 'g');
+exports.chars.combining = new RegExp(exports.chars.combining, 'g');

 function hexify(n) {
  n = n.toString(16);
@ -600,7 +606,7 @@ function hexify(n) {
 }

 /*
-exports.combiningChars = new RegExp(
+exports.chars.combining = new RegExp(
  '['
  + '\\u0300-\\u036f'
  + '\\u0483-\\u0486'
--- a/lib/widget.js
+++ b/lib/widget.js
@ -2424,17 +2424,17 @@ Element.prototype.parseContent = function(noTags) {
    if (this.screen.fullUnicode) {
      // double-width chars will eat the next char after render. create a
      // blank character after it so it doesn't eat the real next char.
-      content = content.replace(unicode.wideChars, '$1_');
+      content = content.replace(unicode.chars.wide, '$1_');
    } else {
      // no double-width: replace them with question-marks.
-      content = content.replace(unicode.allWideChars, '??');
+      content = content.replace(unicode.chars.all, '??');
      // delete combining characters since they're 0-width anyway.
      // NOTE: We could drop this, the non-surrogates would get changed to ? by
      // the unicode filter, and surrogates changed to ? by the surrogate
      // regex. however, the user might expect them to be 0-width.
-      content = content.replace(unicode.combiningChars, '');
+      content = content.replace(unicode.chars.combining, '');
      // no surrogate pairs: replace them with question-marks.
-      content = content.replace(unicode.surrogateChars, '?');
+      content = content.replace(unicode.chars.surrogate, '?');
    }

    if (!noTags) {
@ -2769,7 +2769,7 @@ main:
      }
      // Pad the end of the lines if the surrogate is not a double-width char.
      // var surrogates = out[i].length - punycode.ucs2.decode(out[i]).length;
-      var surrogates = out[i].match(unicode.surrogateChars);
+      var surrogates = out[i].match(unicode.chars.surrogate);
      if (surrogates && surrogates.length) {
        for (var j = 0; j < surrogates.length; j++) {
          var cwid = unicode.charWidth(surrogates[j], 0);